Creation of Cybook 2416 (actually Gen4) repository

2009-12-18 17:10:00 +00:00
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for the Linux networking core.
+#
+
+obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+	 gen_stats.o gen_estimator.o
+
+obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+
+obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+
+obj-$(CONFIG_XFRM) += flow.o
+obj-$(CONFIG_SYSFS) += net-sysfs.o
+obj-$(CONFIG_NET_PKTGEN) += pktgen.o
+obj-$(CONFIG_WIRELESS_EXT) += wireless.o
+obj-$(CONFIG_NETPOLL) += netpoll.o
+obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_FIB_RULES) += fib_rules.o
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -0,0 +1,535 @@
+/*
+ *	SUCS NET3:
+ *
+ *	Generic datagram handling routines. These are generic for all
+ *	protocols. Possibly a generic IP version on top of these would
+ *	make sense. Not tonight however 8-).
+ *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
+ *	NetROM layer all have identical poll code and mostly
+ *	identical recvmsg() code. So we share it here. The poll was
+ *	shared before but buried in udp.c so I moved it.
+ *
+ *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ *						     udp.c code)
+ *
+ *	Fixes:
+ *		Alan Cox	:	NULL return from skb_peek_copy()
+ *					understood
+ *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
+ *					skb_peek_copy stuff.
+ *		Alan Cox	:	Added support for SOCK_SEQPACKET.
+ *					IPX can no longer use the SO_TYPE hack
+ *					but AX.25 now works right, and SPX is
+ *					feasible.
+ *		Alan Cox	:	Fixed write poll of non IP protocol
+ *					crash.
+ *		Florian  La Roche:	Changed for my new skbuff handling.
+ *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
+ *		Linus Torvalds	:	BSD semantic fixes.
+ *		Alan Cox	:	Datagram iovec handling
+ *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
+ *		Alan Cox	:	POSIXisms
+ *		Pete Wyckoff    :       Unconnected accept() fix.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/poll.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+
+/*
+ *	Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+/*
+ * Wait for a packet..
+ */
+static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+{
+	int error;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+	/* Socket errors? */
+	error = sock_error(sk);
+	if (error)
+		goto out_err;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		goto out;
+
+	/* Socket shut down? */
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		goto out_noerr;
+
+	/* Sequenced packets can come disconnected.
+	 * If so we report the problem
+	 */
+	error = -ENOTCONN;
+	if (connection_based(sk) &&
+	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
+		goto out_err;
+
+	/* handle signals */
+	if (signal_pending(current))
+		goto interrupted;
+
+	error = 0;
+	*timeo_p = schedule_timeout(*timeo_p);
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return error;
+interrupted:
+	error = sock_intr_errno(*timeo_p);
+out_err:
+	*err = error;
+	goto out;
+out_noerr:
+	*err = 0;
+	error = 1;
+	goto out;
+}
+
+/**
+ *	skb_recv_datagram - Receive a datagram skbuff
+ *	@sk: socket
+ *	@flags: MSG_ flags
+ *	@noblock: blocking operation?
+ *	@err: error code returned
+ *
+ *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
+ *	and possible races. This replaces identical code in packet, raw and
+ *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
+ *	the long standing peek and read race for datagram sockets. If you
+ *	alter this routine remember it must be re-entrant.
+ *
+ *	This function will lock the socket if a skb is returned, so the caller
+ *	needs to unlock the socket in that case (usually by calling
+ *	skb_free_datagram)
+ *
+ *	* It does not lock socket since today. This function is
+ *	* free of race conditions. This measure should/can improve
+ *	* significantly datagram socket latencies at high loads,
+ *	* when data copying to user space takes lots of time.
+ *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
+ *	*  8) Great win.)
+ *	*			                    --ANK (980729)
+ *
+ *	The order of the tests when we find no data waiting are specified
+ *	quite explicitly by POSIX 1003.1g, don't change them without having
+ *	the standard around please.
+ */
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+				  int noblock, int *err)
+{
+	struct sk_buff *skb;
+	long timeo;
+	/*
+	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
+	 */
+	int error = sock_error(sk);
+
+	if (error)
+		goto no_packet;
+
+	timeo = sock_rcvtimeo(sk, noblock);
+
+	do {
+		/* Again only user level code calls this function, so nothing
+		 * interrupt level will suddenly eat the receive_queue.
+		 *
+		 * Look at current nfs client by the way...
+		 * However, this function was corrent in any case. 8)
+		 */
+		if (flags & MSG_PEEK) {
+			unsigned long cpu_flags;
+
+			spin_lock_irqsave(&sk->sk_receive_queue.lock,
+					  cpu_flags);
+			skb = skb_peek(&sk->sk_receive_queue);
+			if (skb)
+				atomic_inc(&skb->users);
+			spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
+					       cpu_flags);
+		} else
+			skb = skb_dequeue(&sk->sk_receive_queue);
+
+		if (skb)
+			return skb;
+
+		/* User doesn't want to wait */
+		error = -EAGAIN;
+		if (!timeo)
+			goto no_packet;
+
+	} while (!wait_for_packet(sk, err, &timeo));
+
+	return NULL;
+
+no_packet:
+	*err = error;
+	return NULL;
+}
+
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+/**
+ *	skb_kill_datagram - Free a datagram skbuff forcibly
+ *	@sk: socket
+ *	@skb: datagram skbuff
+ *	@flags: MSG_ flags
+ *
+ *	This function frees a datagram skbuff that was received by
+ *	skb_recv_datagram.  The flags argument must match the one
+ *	used for skb_recv_datagram.
+ *
+ *	If the MSG_PEEK flag is set, and the packet is still on the
+ *	receive queue of the socket, it will be taken off the queue
+ *	before it is freed.
+ *
+ *	This function currently only disables BH when acquiring the
+ *	sk_receive_queue lock.  Therefore it must not be used in a
+ *	context where that lock is acquired in an IRQ context.
+ */
+
+void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+{
+	if (flags & MSG_PEEK) {
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			atomic_dec(&skb->users);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+
+	kfree_skb(skb);
+}
+
+EXPORT_SYMBOL(skb_kill_datagram);
+
+/**
+ *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: io vector to copy to
+ *	@len: amount of data to copy from buffer to iovec
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
+			    struct iovec *to, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_toiovec(to, skb->data + offset, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_toiovec(to, vaddr + frag->page_offset +
+					     offset - start, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_datagram_iovec(list,
+							    offset - start,
+							    to, copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
+				      u8 __user *to, int len,
+				      __wsum *csump)
+{
+	int start = skb_headlen(skb);
+	int pos = 0;
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		int err = 0;
+		if (copy > len)
+			copy = len;
+		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
+					       *csump, &err);
+		if (err)
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to += copy;
+		pos = copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			__wsum csum2;
+			int err = 0;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			csum2 = csum_and_copy_to_user(vaddr +
+							frag->page_offset +
+							offset - start,
+						      to, copy, 0, &err);
+			kunmap(page);
+			if (err)
+				goto fault;
+			*csump = csum_block_add(*csump, csum2, pos);
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			to += copy;
+			pos += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list=list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				__wsum csum2 = 0;
+				if (copy > len)
+					copy = len;
+				if (skb_copy_and_csum_datagram(list,
+							       offset - start,
+							       to, copy,
+							       &csum2))
+					goto fault;
+				*csump = csum_block_add(*csump, csum2, pos);
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to += copy;
+				pos += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+	__sum16 sum;
+
+	sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+			netdev_rx_csum_fault(skb->dev);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
+
+/**
+ *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
+ *	@skb: skbuff
+ *	@hlen: hardware length
+ *	@iov: io vector
+ *
+ *	Caller _must_ check that skb will fit to this iovec.
+ *
+ *	Returns: 0       - success.
+ *		 -EINVAL - checksum failure.
+ *		 -EFAULT - fault during copy. Beware, in this case iovec
+ *			   can be modified!
+ */
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
+				     int hlen, struct iovec *iov)
+{
+	__wsum csum;
+	int chunk = skb->len - hlen;
+
+	/* Skip filled elements.
+	 * Pretty silly, look at memcpy_toiovec, though 8)
+	 */
+	while (!iov->iov_len)
+		iov++;
+
+	if (iov->iov_len < chunk) {
+		if (__skb_checksum_complete(skb))
+			goto csum_error;
+		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+			goto fault;
+	} else {
+		csum = csum_partial(skb->data, hlen, skb->csum);
+		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+					       chunk, &csum))
+			goto fault;
+		if (csum_fold(csum))
+			goto csum_error;
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+			netdev_rx_csum_fault(skb->dev);
+		iov->iov_len -= chunk;
+		iov->iov_base += chunk;
+	}
+	return 0;
+csum_error:
+	return -EINVAL;
+fault:
+	return -EFAULT;
+}
+
+/**
+ * 	datagram_poll - generic datagram poll
+ *	@file: file struct
+ *	@sock: socket
+ *	@wait: poll table
+ *
+ *	Datagram poll: Again totally generic. This also handles
+ *	sequenced packet sockets providing the socket receive queue
+ *	is only ever holding data ready to receive.
+ *
+ *	Note: when you _don't_ use this routine for this protocol,
+ *	and you use a different write policy from sock_writeable()
+ *	then please supply your own write_space callback.
+ */
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (connection_based(sk)) {
+		if (sk->sk_state == TCP_CLOSE)
+			mask |= POLLHUP;
+		/* connection hasn't started yet? */
+		if (sk->sk_state == TCP_SYN_SENT)
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+
+EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
+EXPORT_SYMBOL(skb_free_datagram);
+EXPORT_SYMBOL(skb_recv_datagram);
--- a/net/core/dev.c
+++ b/net/core/dev.c
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -0,0 +1,296 @@
+/*
+ *	Linux NET3:	Multicast List maintenance.
+ *
+ *	Authors:
+ *		Tim Kordas <tjk@nostromo.eeap.cwru.edu>
+ *		Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ *	Stir fried together from the IP multicast and CAP patches above
+ *		Alan Cox <Alan.Cox@linux.org>
+ *
+ *	Fixes:
+ *		Alan Cox	:	Update the device on a real delete
+ *					rather than any time but...
+ *		Alan Cox	:	IFF_ALLMULTI support.
+ *		Alan Cox	: 	New format set_multicast_list() calls.
+ *		Gleb Natapov    :       Remove dev_mc_lock.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+
+
+/*
+ *	Device multicast list maintenance.
+ *
+ *	This is used both by IP and by the user level maintenance functions.
+ *	Unlike BSD we maintain a usage count on a given multicast address so
+ *	that a casual user application can add/delete multicasts used by
+ *	protocols without doing damage to the protocols when it deletes the
+ *	entries. It also helps IP as it tracks overlapping maps.
+ *
+ *	Device mc lists are changed by bh at least if IPv6 is enabled,
+ *	so that it must be bh protected.
+ *
+ *	We block accesses to device mc filters with netif_tx_lock.
+ */
+
+/*
+ *	Update the multicast list into the physical NIC controller.
+ */
+
+static void __dev_mc_upload(struct net_device *dev)
+{
+	/* Don't do anything till we up the interface
+	 * [dev_open will call this function so the list will
+	 * stay sane]
+	 */
+
+	if (!(dev->flags&IFF_UP))
+		return;
+
+	/*
+	 *	Devices with no set multicast or which have been
+	 *	detached don't get set.
+	 */
+
+	if (dev->set_multicast_list == NULL ||
+	    !netif_device_present(dev))
+		return;
+
+	dev->set_multicast_list(dev);
+}
+
+void dev_mc_upload(struct net_device *dev)
+{
+	netif_tx_lock_bh(dev);
+	__dev_mc_upload(dev);
+	netif_tx_unlock_bh(dev);
+}
+
+/*
+ *	Delete a device level multicast
+ */
+
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, **dmip;
+
+	netif_tx_lock_bh(dev);
+
+	for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+		/*
+		 *	Find the entry we want to delete. The device could
+		 *	have variable length entries so check these too.
+		 */
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    alen == dmi->dmi_addrlen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 0;
+				if (old_glbl == 0)
+					break;
+			}
+			if (--dmi->dmi_users)
+				goto done;
+
+			/*
+			 *	Last user. So delete the entry.
+			 */
+			*dmip = dmi->next;
+			dev->mc_count--;
+
+			kfree(dmi);
+
+			/*
+			 *	We have altered the list, so the card
+			 *	loaded filter is now wrong. Fix it
+			 */
+			__dev_mc_upload(dev);
+
+			netif_tx_unlock_bh(dev);
+			return 0;
+		}
+	}
+	err = -ENOENT;
+done:
+	netif_tx_unlock_bh(dev);
+	return err;
+}
+
+/*
+ *	Add a device level multicast
+ */
+
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
+{
+	int err = 0;
+	struct dev_mc_list *dmi, *dmi1;
+
+	dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+	netif_tx_lock_bh(dev);
+	for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+		if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+		    dmi->dmi_addrlen == alen) {
+			if (glbl) {
+				int old_glbl = dmi->dmi_gusers;
+				dmi->dmi_gusers = 1;
+				if (old_glbl)
+					goto done;
+			}
+			dmi->dmi_users++;
+			goto done;
+		}
+	}
+
+	if ((dmi = dmi1) == NULL) {
+		netif_tx_unlock_bh(dev);
+		return -ENOMEM;
+	}
+	memcpy(dmi->dmi_addr, addr, alen);
+	dmi->dmi_addrlen = alen;
+	dmi->next = dev->mc_list;
+	dmi->dmi_users = 1;
+	dmi->dmi_gusers = glbl ? 1 : 0;
+	dev->mc_list = dmi;
+	dev->mc_count++;
+
+	__dev_mc_upload(dev);
+
+	netif_tx_unlock_bh(dev);
+	return 0;
+
+done:
+	netif_tx_unlock_bh(dev);
+	kfree(dmi1);
+	return err;
+}
+
+/*
+ *	Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct net_device *dev)
+{
+	netif_tx_lock_bh(dev);
+
+	while (dev->mc_list != NULL) {
+		struct dev_mc_list *tmp = dev->mc_list;
+		dev->mc_list = tmp->next;
+		if (tmp->dmi_users > tmp->dmi_gusers)
+			printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+		kfree(tmp);
+	}
+	dev->mc_count = 0;
+
+	netif_tx_unlock_bh(dev);
+}
+
+#ifdef CONFIG_PROC_FS
+static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	loff_t off = 0;
+
+	read_lock(&dev_base_lock);
+	for (dev = dev_base; dev; dev = dev->next) {
+		if (off++ == *pos)
+			return dev;
+	}
+	return NULL;
+}
+
+static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct net_device *dev = v;
+	++*pos;
+	return dev->next;
+}
+
+static void dev_mc_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&dev_base_lock);
+}
+
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct dev_mc_list *m;
+	struct net_device *dev = v;
+
+	netif_tx_lock_bh(dev);
+	for (m = dev->mc_list; m; m = m->next) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, m->dmi_users, m->dmi_gusers);
+
+		for (i = 0; i < m->dmi_addrlen; i++)
+			seq_printf(seq, "%02x", m->dmi_addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_tx_unlock_bh(dev);
+	return 0;
+}
+
+static struct seq_operations dev_mc_seq_ops = {
+	.start = dev_mc_seq_start,
+	.next  = dev_mc_seq_next,
+	.stop  = dev_mc_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &dev_mc_seq_ops);
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+#endif
+
+void __init dev_mcast_init(void)
+{
+	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+}
+
+EXPORT_SYMBOL(dev_mc_add);
+EXPORT_SYMBOL(dev_mc_delete);
+EXPORT_SYMBOL(dev_mc_upload);
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -0,0 +1,288 @@
+/*
+ * net/core/dst.c	Protocol independent destination cache.
+ *
+ * Authors:		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <net/dst.h>
+
+/* Locking strategy:
+ * 1) Garbage collection state of dead destination cache
+ *    entries is protected by dst_lock.
+ * 2) GC is run only from BH context, and is the only remover
+ *    of entries.
+ * 3) Entries are added to the garbage list from both BH
+ *    and non-BH context, so local BH disabling is needed.
+ * 4) All operations modify state, so a spinlock is used.
+ */
+static struct dst_entry 	*dst_garbage_list;
+#if RT_CACHE_DEBUG >= 2
+static atomic_t			 dst_total = ATOMIC_INIT(0);
+#endif
+static DEFINE_SPINLOCK(dst_lock);
+
+static unsigned long dst_gc_timer_expires;
+static unsigned long dst_gc_timer_inc = DST_GC_MAX;
+static void dst_run_gc(unsigned long);
+static void ___dst_free(struct dst_entry * dst);
+
+static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0);
+
+static void dst_run_gc(unsigned long dummy)
+{
+	int    delayed = 0;
+	int    work_performed;
+	struct dst_entry * dst, **dstp;
+
+	if (!spin_trylock(&dst_lock)) {
+		mod_timer(&dst_gc_timer, jiffies + HZ/10);
+		return;
+	}
+
+	del_timer(&dst_gc_timer);
+	dstp = &dst_garbage_list;
+	work_performed = 0;
+	while ((dst = *dstp) != NULL) {
+		if (atomic_read(&dst->__refcnt)) {
+			dstp = &dst->next;
+			delayed++;
+			continue;
+		}
+		*dstp = dst->next;
+		work_performed = 1;
+
+		dst = dst_destroy(dst);
+		if (dst) {
+			/* NOHASH and still referenced. Unless it is already
+			 * on gc list, invalidate it and add to gc list.
+			 *
+			 * Note: this is temporary. Actually, NOHASH dst's
+			 * must be obsoleted when parent is obsoleted.
+			 * But we do not have state "obsoleted, but
+			 * referenced by parent", so it is right.
+			 */
+			if (dst->obsolete > 1)
+				continue;
+
+			___dst_free(dst);
+			dst->next = *dstp;
+			*dstp = dst;
+			dstp = &dst->next;
+		}
+	}
+	if (!dst_garbage_list) {
+		dst_gc_timer_inc = DST_GC_MAX;
+		goto out;
+	}
+	if (!work_performed) {
+		if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+			dst_gc_timer_expires = DST_GC_MAX;
+		dst_gc_timer_inc += DST_GC_INC;
+	} else {
+		dst_gc_timer_inc = DST_GC_INC;
+		dst_gc_timer_expires = DST_GC_MIN;
+	}
+#if RT_CACHE_DEBUG >= 2
+	printk("dst_total: %d/%d %ld\n",
+	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
+#endif
+	/* if the next desired timer is more than 4 seconds in the future
+	 * then round the timer to whole seconds
+	 */
+	if (dst_gc_timer_expires > 4*HZ)
+		mod_timer(&dst_gc_timer,
+			round_jiffies(jiffies + dst_gc_timer_expires));
+	else
+		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+
+out:
+	spin_unlock(&dst_lock);
+}
+
+static int dst_discard_in(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+static int dst_discard_out(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return 0;
+}
+
+void * dst_alloc(struct dst_ops * ops)
+{
+	struct dst_entry * dst;
+
+	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+		if (ops->gc())
+			return NULL;
+	}
+	dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
+	if (!dst)
+		return NULL;
+	atomic_set(&dst->__refcnt, 0);
+	dst->ops = ops;
+	dst->lastuse = jiffies;
+	dst->path = dst;
+	dst->input = dst_discard_in;
+	dst->output = dst_discard_out;
+#if RT_CACHE_DEBUG >= 2
+	atomic_inc(&dst_total);
+#endif
+	atomic_inc(&ops->entries);
+	return dst;
+}
+
+static void ___dst_free(struct dst_entry * dst)
+{
+	/* The first case (dev==NULL) is required, when
+	   protocol module is unloaded.
+	 */
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+		dst->input = dst_discard_in;
+		dst->output = dst_discard_out;
+	}
+	dst->obsolete = 2;
+}
+
+void __dst_free(struct dst_entry * dst)
+{
+	spin_lock_bh(&dst_lock);
+	___dst_free(dst);
+	dst->next = dst_garbage_list;
+	dst_garbage_list = dst;
+	if (dst_gc_timer_inc > DST_GC_INC) {
+		dst_gc_timer_inc = DST_GC_INC;
+		dst_gc_timer_expires = DST_GC_MIN;
+		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+	}
+	spin_unlock_bh(&dst_lock);
+}
+
+struct dst_entry *dst_destroy(struct dst_entry * dst)
+{
+	struct dst_entry *child;
+	struct neighbour *neigh;
+	struct hh_cache *hh;
+
+	smp_rmb();
+
+again:
+	neigh = dst->neighbour;
+	hh = dst->hh;
+	child = dst->child;
+
+	dst->hh = NULL;
+	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+		kfree(hh);
+
+	if (neigh) {
+		dst->neighbour = NULL;
+		neigh_release(neigh);
+	}
+
+	atomic_dec(&dst->ops->entries);
+
+	if (dst->ops->destroy)
+		dst->ops->destroy(dst);
+	if (dst->dev)
+		dev_put(dst->dev);
+#if RT_CACHE_DEBUG >= 2
+	atomic_dec(&dst_total);
+#endif
+	kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+	dst = child;
+	if (dst) {
+		int nohash = dst->flags & DST_NOHASH;
+
+		if (atomic_dec_and_test(&dst->__refcnt)) {
+			/* We were real parent of this dst, so kill child. */
+			if (nohash)
+				goto again;
+		} else {
+			/* Child is still referenced, return it for freeing. */
+			if (nohash)
+				return dst;
+			/* Child is still in his hash table */
+		}
+	}
+	return NULL;
+}
+
+/* Dirty hack. We did it in 2.2 (in __dst_free),
+ * we have _very_ good reasons not to repeat
+ * this mistake in 2.3, but we have no choice
+ * now. _It_ _is_ _explicit_ _deliberate_
+ * _race_ _condition_.
+ *
+ * Commented and originally written by Alexey.
+ */
+static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			      int unregister)
+{
+	if (dst->ops->ifdown)
+		dst->ops->ifdown(dst, dev, unregister);
+
+	if (dev != dst->dev)
+		return;
+
+	if (!unregister) {
+		dst->input = dst_discard_in;
+		dst->output = dst_discard_out;
+	} else {
+		dst->dev = &loopback_dev;
+		dev_hold(&loopback_dev);
+		dev_put(dev);
+		if (dst->neighbour && dst->neighbour->dev == dev) {
+			dst->neighbour->dev = &loopback_dev;
+			dev_put(dev);
+			dev_hold(&loopback_dev);
+		}
+	}
+}
+
+static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct dst_entry *dst;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+	case NETDEV_DOWN:
+		spin_lock_bh(&dst_lock);
+		for (dst = dst_garbage_list; dst; dst = dst->next) {
+			dst_ifdown(dst, dev, event != NETDEV_DOWN);
+		}
+		spin_unlock_bh(&dst_lock);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dst_dev_notifier = {
+	.notifier_call	= dst_dev_event,
+};
+
+void __init dst_init(void)
+{
+	register_netdevice_notifier(&dst_dev_notifier);
+}
+
+EXPORT_SYMBOL(__dst_free);
+EXPORT_SYMBOL(dst_alloc);
+EXPORT_SYMBOL(dst_destroy);
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -0,0 +1,984 @@
+/*
+ * net/core/ethtool.c - Ethtool ioctl handler
+ * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
+ *
+ * This file is where we call all the ethtool_ops commands to get
+ * the information ethtool needs.  We fall back to calling do_ioctl()
+ * for drivers which haven't been converted to ethtool_ops yet.
+ *
+ * It's GPL, stupid.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <asm/uaccess.h>
+
+/*
+ * Some useful ethtool_ops methods that're device independent.
+ * If we find that all drivers want to do the same thing here,
+ * we can turn these into dev_() function calls.
+ */
+
+u32 ethtool_op_get_link(struct net_device *dev)
+{
+	return netif_carrier_ok(dev) ? 1 : 0;
+}
+
+u32 ethtool_op_get_tx_csum(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_ALL_CSUM) != 0;
+}
+
+int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_IP_CSUM;
+	else
+		dev->features &= ~NETIF_F_IP_CSUM;
+
+	return 0;
+}
+
+int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_HW_CSUM;
+	else
+		dev->features &= ~NETIF_F_HW_CSUM;
+
+	return 0;
+}
+u32 ethtool_op_get_sg(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_SG) != 0;
+}
+
+int ethtool_op_set_sg(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_SG;
+	else
+		dev->features &= ~NETIF_F_SG;
+
+	return 0;
+}
+
+u32 ethtool_op_get_tso(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_TSO) != 0;
+}
+
+int ethtool_op_set_tso(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_TSO;
+	else
+		dev->features &= ~NETIF_F_TSO;
+
+	return 0;
+}
+
+int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
+{
+	unsigned char len = dev->addr_len;
+	if ( addr->size < len )
+		return -ETOOSMALL;
+
+	addr->size = len;
+	memcpy(data, dev->perm_addr, len);
+	return 0;
+}
+
+
+u32 ethtool_op_get_ufo(struct net_device *dev)
+{
+	return (dev->features & NETIF_F_UFO) != 0;
+}
+
+int ethtool_op_set_ufo(struct net_device *dev, u32 data)
+{
+	if (data)
+		dev->features |= NETIF_F_UFO;
+	else
+		dev->features &= ~NETIF_F_UFO;
+	return 0;
+}
+
+/* Handlers for each ethtool command */
+
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd = { ETHTOOL_GSET };
+	int err;
+
+	if (!dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	err = dev->ethtool_ops->get_settings(dev, &cmd);
+	if (err < 0)
+		return err;
+
+	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_cmd cmd;
+
+	if (!dev->ethtool_ops->set_settings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_settings(dev, &cmd);
+}
+
+static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_drvinfo info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!ops->get_drvinfo)
+		return -EOPNOTSUPP;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GDRVINFO;
+	ops->get_drvinfo(dev, &info);
+
+	if (ops->self_test_count)
+		info.testinfo_len = ops->self_test_count(dev);
+	if (ops->get_stats_count)
+		info.n_stats = ops->get_stats_count(dev);
+	if (ops->get_regs_len)
+		info.regdump_len = ops->get_regs_len(dev);
+	if (ops->get_eeprom_len)
+		info.eedump_len = ops->get_eeprom_len(dev);
+
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_regs regs;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *regbuf;
+	int reglen, ret;
+
+	if (!ops->get_regs || !ops->get_regs_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&regs, useraddr, sizeof(regs)))
+		return -EFAULT;
+
+	reglen = ops->get_regs_len(dev);
+	if (regs.len > reglen)
+		regs.len = reglen;
+
+	regbuf = kmalloc(reglen, GFP_USER);
+	if (!regbuf)
+		return -ENOMEM;
+
+	ops->get_regs(dev, &regs, regbuf);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &regs, sizeof(regs)))
+		goto out;
+	useraddr += offsetof(struct ethtool_regs, data);
+	if (copy_to_user(useraddr, regbuf, regs.len))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(regbuf);
+	return ret;
+}
+
+static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+
+	if (!dev->ethtool_ops->get_wol)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_wol(dev, &wol);
+
+	if (copy_to_user(useraddr, &wol, sizeof(wol)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_wolinfo wol;
+
+	if (!dev->ethtool_ops->set_wol)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&wol, useraddr, sizeof(wol)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_wol(dev, &wol);
+}
+
+static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GMSGLVL };
+
+	if (!dev->ethtool_ops->get_msglevel)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_msglevel(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_msglevel)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	dev->ethtool_ops->set_msglevel(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_nway_reset(struct net_device *dev)
+{
+	if (!dev->ethtool_ops->nway_reset)
+		return -EOPNOTSUPP;
+
+	return dev->ethtool_ops->nway_reset(dev);
+}
+
+static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->get_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
+		goto out;
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_eeprom eeprom;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->set_eeprom || !ops->get_eeprom_len)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
+		return -EFAULT;
+
+	/* Check for wrap and zero */
+	if (eeprom.offset + eeprom.len <= eeprom.offset)
+		return -EINVAL;
+
+	/* Check for exceeding total eeprom len */
+	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
+		return -EINVAL;
+
+	data = kmalloc(eeprom.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
+		goto out;
+
+	ret = ops->set_eeprom(dev, &eeprom, data);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
+		ret = -EFAULT;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
+
+	if (!dev->ethtool_ops->get_coalesce)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_coalesce(dev, &coalesce);
+
+	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_coalesce coalesce;
+
+	if (!dev->ethtool_ops->set_coalesce)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_coalesce(dev, &coalesce);
+}
+
+static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
+
+	if (!dev->ethtool_ops->get_ringparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_ringparam(dev, &ringparam);
+
+	if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_ringparam ringparam;
+
+	if (!dev->ethtool_ops->set_ringparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_ringparam(dev, &ringparam);
+}
+
+static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
+
+	if (!dev->ethtool_ops->get_pauseparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
+
+	if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_pauseparam pauseparam;
+
+	if (!dev->ethtool_ops->set_pauseparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
+}
+
+static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GRXCSUM };
+
+	if (!dev->ethtool_ops->get_rx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_rx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_rx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	dev->ethtool_ops->set_rx_csum(dev, edata.data);
+	return 0;
+}
+
+static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTXCSUM };
+
+	if (!dev->ethtool_ops->get_tx_csum)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_tx_csum(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int __ethtool_set_sg(struct net_device *dev, u32 data)
+{
+	int err;
+
+	if (!data && dev->ethtool_ops->set_tso) {
+		err = dev->ethtool_ops->set_tso(dev, 0);
+		if (err)
+			return err;
+	}
+
+	if (!data && dev->ethtool_ops->set_ufo) {
+		err = dev->ethtool_ops->set_ufo(dev, 0);
+		if (err)
+			return err;
+	}
+	return dev->ethtool_ops->set_sg(dev, data);
+}
+
+static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+	int err;
+
+	if (!dev->ethtool_ops->set_tx_csum)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (!edata.data && dev->ethtool_ops->set_sg) {
+		err = __ethtool_set_sg(dev, 0);
+		if (err)
+			return err;
+	}
+
+	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
+}
+
+static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GSG };
+
+	if (!dev->ethtool_ops->get_sg)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_sg(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data &&
+	    !(dev->features & NETIF_F_ALL_CSUM))
+		return -EINVAL;
+
+	return __ethtool_set_sg(dev, edata.data);
+}
+
+static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GTSO };
+
+	if (!dev->ethtool_ops->get_tso)
+		return -EOPNOTSUPP;
+
+	edata.data = dev->ethtool_ops->get_tso(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_tso)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+
+	if (edata.data && !(dev->features & NETIF_F_SG))
+		return -EINVAL;
+
+	return dev->ethtool_ops->set_tso(dev, edata.data);
+}
+
+static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GUFO };
+
+	if (!dev->ethtool_ops->get_ufo)
+		return -EOPNOTSUPP;
+	edata.data = dev->ethtool_ops->get_ufo(dev);
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		 return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (!dev->ethtool_ops->set_ufo)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+	if (edata.data && !(dev->features & NETIF_F_SG))
+		return -EINVAL;
+	if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
+		return -EINVAL;
+	return dev->ethtool_ops->set_ufo(dev, edata.data);
+}
+
+static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { ETHTOOL_GGSO };
+
+	edata.data = dev->features & NETIF_F_GSO;
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		 return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata;
+
+	if (copy_from_user(&edata, useraddr, sizeof(edata)))
+		return -EFAULT;
+	if (edata.data)
+		dev->features |= NETIF_F_GSO;
+	else
+		dev->features &= ~NETIF_F_GSO;
+	return 0;
+}
+
+static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_test test;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->self_test || !ops->self_test_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&test, useraddr, sizeof(test)))
+		return -EFAULT;
+
+	test.len = ops->self_test_count(dev);
+	data = kmalloc(test.len * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->self_test(dev, &test, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &test, sizeof(test)))
+		goto out;
+	useraddr += sizeof(test);
+	if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u8 *data;
+	int ret;
+
+	if (!ops->get_strings)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	switch (gstrings.string_set) {
+	case ETH_SS_TEST:
+		if (!ops->self_test_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->self_test_count(dev);
+		break;
+	case ETH_SS_STATS:
+		if (!ops->get_stats_count)
+			return -EOPNOTSUPP;
+		gstrings.len = ops->get_stats_count(dev);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_strings(dev, gstrings.string_set, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_value id;
+
+	if (!dev->ethtool_ops->phys_id)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&id, useraddr, sizeof(id)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->phys_id(dev, id.data);
+}
+
+static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_stats stats;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 *data;
+	int ret;
+
+	if (!ops->get_ethtool_stats || !ops->get_stats_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&stats, useraddr, sizeof(stats)))
+		return -EFAULT;
+
+	stats.n_stats = ops->get_stats_count(dev);
+	data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ops->get_ethtool_stats(dev, &stats, data);
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &stats, sizeof(stats)))
+		goto out;
+	useraddr += sizeof(stats);
+	if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_perm_addr epaddr;
+	u8 *data;
+	int ret;
+
+	if (!dev->ethtool_ops->get_perm_addr)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+		return -EFAULT;
+
+	data = kmalloc(epaddr.size, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+		goto out;
+	useraddr += sizeof(epaddr);
+	if (copy_to_user(useraddr, data, epaddr.size))
+		goto out;
+	ret = 0;
+
+ out:
+	kfree(data);
+	return ret;
+}
+
+/* The main entry point in this file.  Called from net/core/dev.c */
+
+int dev_ethtool(struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+	void __user *useraddr = ifr->ifr_data;
+	u32 ethcmd;
+	int rc;
+	unsigned long old_features;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	if (!dev->ethtool_ops)
+		goto ioctl;
+
+	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+		return -EFAULT;
+
+	/* Allow some commands to be done by anyone */
+	switch(ethcmd) {
+	case ETHTOOL_GDRVINFO:
+	case ETHTOOL_GMSGLVL:
+	case ETHTOOL_GCOALESCE:
+	case ETHTOOL_GRINGPARAM:
+	case ETHTOOL_GPAUSEPARAM:
+	case ETHTOOL_GRXCSUM:
+	case ETHTOOL_GTXCSUM:
+	case ETHTOOL_GSG:
+	case ETHTOOL_GSTRINGS:
+	case ETHTOOL_GTSO:
+	case ETHTOOL_GPERMADDR:
+	case ETHTOOL_GUFO:
+	case ETHTOOL_GGSO:
+		break;
+	default:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+	}
+
+	if(dev->ethtool_ops->begin)
+		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+			return rc;
+
+	old_features = dev->features;
+
+	switch (ethcmd) {
+	case ETHTOOL_GSET:
+		rc = ethtool_get_settings(dev, useraddr);
+		break;
+	case ETHTOOL_SSET:
+		rc = ethtool_set_settings(dev, useraddr);
+		break;
+	case ETHTOOL_GDRVINFO:
+		rc = ethtool_get_drvinfo(dev, useraddr);
+		break;
+	case ETHTOOL_GREGS:
+		rc = ethtool_get_regs(dev, useraddr);
+		break;
+	case ETHTOOL_GWOL:
+		rc = ethtool_get_wol(dev, useraddr);
+		break;
+	case ETHTOOL_SWOL:
+		rc = ethtool_set_wol(dev, useraddr);
+		break;
+	case ETHTOOL_GMSGLVL:
+		rc = ethtool_get_msglevel(dev, useraddr);
+		break;
+	case ETHTOOL_SMSGLVL:
+		rc = ethtool_set_msglevel(dev, useraddr);
+		break;
+	case ETHTOOL_NWAY_RST:
+		rc = ethtool_nway_reset(dev);
+		break;
+	case ETHTOOL_GLINK:
+		rc = ethtool_get_link(dev, useraddr);
+		break;
+	case ETHTOOL_GEEPROM:
+		rc = ethtool_get_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_SEEPROM:
+		rc = ethtool_set_eeprom(dev, useraddr);
+		break;
+	case ETHTOOL_GCOALESCE:
+		rc = ethtool_get_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_SCOALESCE:
+		rc = ethtool_set_coalesce(dev, useraddr);
+		break;
+	case ETHTOOL_GRINGPARAM:
+		rc = ethtool_get_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_SRINGPARAM:
+		rc = ethtool_set_ringparam(dev, useraddr);
+		break;
+	case ETHTOOL_GPAUSEPARAM:
+		rc = ethtool_get_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_SPAUSEPARAM:
+		rc = ethtool_set_pauseparam(dev, useraddr);
+		break;
+	case ETHTOOL_GRXCSUM:
+		rc = ethtool_get_rx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_SRXCSUM:
+		rc = ethtool_set_rx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_GTXCSUM:
+		rc = ethtool_get_tx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_STXCSUM:
+		rc = ethtool_set_tx_csum(dev, useraddr);
+		break;
+	case ETHTOOL_GSG:
+		rc = ethtool_get_sg(dev, useraddr);
+		break;
+	case ETHTOOL_SSG:
+		rc = ethtool_set_sg(dev, useraddr);
+		break;
+	case ETHTOOL_GTSO:
+		rc = ethtool_get_tso(dev, useraddr);
+		break;
+	case ETHTOOL_STSO:
+		rc = ethtool_set_tso(dev, useraddr);
+		break;
+	case ETHTOOL_TEST:
+		rc = ethtool_self_test(dev, useraddr);
+		break;
+	case ETHTOOL_GSTRINGS:
+		rc = ethtool_get_strings(dev, useraddr);
+		break;
+	case ETHTOOL_PHYS_ID:
+		rc = ethtool_phys_id(dev, useraddr);
+		break;
+	case ETHTOOL_GSTATS:
+		rc = ethtool_get_stats(dev, useraddr);
+		break;
+	case ETHTOOL_GPERMADDR:
+		rc = ethtool_get_perm_addr(dev, useraddr);
+		break;
+	case ETHTOOL_GUFO:
+		rc = ethtool_get_ufo(dev, useraddr);
+		break;
+	case ETHTOOL_SUFO:
+		rc = ethtool_set_ufo(dev, useraddr);
+		break;
+	case ETHTOOL_GGSO:
+		rc = ethtool_get_gso(dev, useraddr);
+		break;
+	case ETHTOOL_SGSO:
+		rc = ethtool_set_gso(dev, useraddr);
+		break;
+	default:
+		rc =  -EOPNOTSUPP;
+	}
+
+	if(dev->ethtool_ops->complete)
+		dev->ethtool_ops->complete(dev);
+
+	if (old_features != dev->features)
+		netdev_features_change(dev);
+
+	return rc;
+
+ ioctl:
+	/* Keep existing behaviour for the moment.	 */
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (dev->do_ioctl)
+		return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
+	return -EOPNOTSUPP;
+}
+
+EXPORT_SYMBOL(dev_ethtool);
+EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
+EXPORT_SYMBOL(ethtool_op_get_sg);
+EXPORT_SYMBOL(ethtool_op_get_tso);
+EXPORT_SYMBOL(ethtool_op_get_tx_csum);
+EXPORT_SYMBOL(ethtool_op_set_sg);
+EXPORT_SYMBOL(ethtool_op_set_tso);
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
+EXPORT_SYMBOL(ethtool_op_set_ufo);
+EXPORT_SYMBOL(ethtool_op_get_ufo);
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -0,0 +1,507 @@
+/*
+ * net/core/fib_rules.c		Generic Routing Rules
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <net/fib_rules.h>
+
+static LIST_HEAD(rules_ops);
+static DEFINE_SPINLOCK(rules_mod_lock);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid);
+
+static struct fib_rules_ops *lookup_rules_ops(int family)
+{
+	struct fib_rules_ops *ops;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &rules_ops, list) {
+		if (ops->family == family) {
+			if (!try_module_get(ops->owner))
+				ops = NULL;
+			rcu_read_unlock();
+			return ops;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void rules_ops_put(struct fib_rules_ops *ops)
+{
+	if (ops)
+		module_put(ops->owner);
+}
+
+int fib_rules_register(struct fib_rules_ops *ops)
+{
+	int err = -EEXIST;
+	struct fib_rules_ops *o;
+
+	if (ops->rule_size < sizeof(struct fib_rule))
+		return -EINVAL;
+
+	if (ops->match == NULL || ops->configure == NULL ||
+	    ops->compare == NULL || ops->fill == NULL ||
+	    ops->action == NULL)
+		return -EINVAL;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list)
+		if (ops->family == o->family)
+			goto errout;
+
+	list_add_tail_rcu(&ops->list, &rules_ops);
+	err = 0;
+errout:
+	spin_unlock(&rules_mod_lock);
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_register);
+
+static void cleanup_ops(struct fib_rules_ops *ops)
+{
+	struct fib_rule *rule, *tmp;
+
+	list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+		list_del_rcu(&rule->list);
+		fib_rule_put(rule);
+	}
+}
+
+int fib_rules_unregister(struct fib_rules_ops *ops)
+{
+	int err = 0;
+	struct fib_rules_ops *o;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list) {
+		if (o == ops) {
+			list_del_rcu(&o->list);
+			cleanup_ops(ops);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	spin_unlock(&rules_mod_lock);
+
+	synchronize_rcu();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_unregister);
+
+static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
+			  struct flowi *fl, int flags)
+{
+	int ret = 0;
+
+	if (rule->ifindex && (rule->ifindex != fl->iif))
+		goto out;
+
+	if ((rule->mark ^ fl->mark) & rule->mark_mask)
+		goto out;
+
+	ret = ops->match(rule, fl, flags);
+out:
+	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
+}
+
+int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
+		     int flags, struct fib_lookup_arg *arg)
+{
+	struct fib_rule *rule;
+	int err;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+		if (!fib_rule_match(rule, ops, fl, flags))
+			continue;
+
+		err = ops->action(rule, fl, flags, arg);
+		if (err != -EAGAIN) {
+			fib_rule_get(rule);
+			arg->rule = rule;
+			goto out;
+		}
+	}
+
+	err = -ESRCH;
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_lookup);
+
+static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
+			    struct fib_rules_ops *ops)
+{
+	int err = -EINVAL;
+
+	if (frh->src_len)
+		if (tb[FRA_SRC] == NULL ||
+		    frh->src_len > (ops->addr_size * 8) ||
+		    nla_len(tb[FRA_SRC]) != ops->addr_size)
+			goto errout;
+
+	if (frh->dst_len)
+		if (tb[FRA_DST] == NULL ||
+		    frh->dst_len > (ops->addr_size * 8) ||
+		    nla_len(tb[FRA_DST]) != ops->addr_size)
+			goto errout;
+
+	err = 0;
+errout:
+	return err;
+}
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule, *r, *last = NULL;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	err = validate_rulemsg(frh, tb, ops);
+	if (err < 0)
+		goto errout;
+
+	rule = kzalloc(ops->rule_size, GFP_KERNEL);
+	if (rule == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	if (tb[FRA_PRIORITY])
+		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+
+	if (tb[FRA_IFNAME]) {
+		struct net_device *dev;
+
+		rule->ifindex = -1;
+		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(rule->ifname);
+		if (dev)
+			rule->ifindex = dev->ifindex;
+	}
+
+	if (tb[FRA_FWMARK]) {
+		rule->mark = nla_get_u32(tb[FRA_FWMARK]);
+		if (rule->mark)
+			/* compatibility: if the mark value is non-zero all bits
+			 * are compared unless a mask is explicitly specified.
+			 */
+			rule->mark_mask = 0xFFFFFFFF;
+	}
+
+	if (tb[FRA_FWMASK])
+		rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+
+	rule->action = frh->action;
+	rule->flags = frh->flags;
+	rule->table = frh_get_table(frh, tb);
+
+	if (!rule->pref && ops->default_pref)
+		rule->pref = ops->default_pref();
+
+	err = ops->configure(rule, skb, nlh, frh, tb);
+	if (err < 0)
+		goto errout_free;
+
+	list_for_each_entry(r, ops->rules_list, list) {
+		if (r->pref > rule->pref)
+			break;
+		last = r;
+	}
+
+	fib_rule_get(rule);
+
+	if (last)
+		list_add_rcu(&rule->list, &last->list);
+	else
+		list_add_rcu(&rule->list, ops->rules_list);
+
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	rules_ops_put(ops);
+	return 0;
+
+errout_free:
+	kfree(rule);
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	err = validate_rulemsg(frh, tb, ops);
+	if (err < 0)
+		goto errout;
+
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (frh->action && (frh->action != rule->action))
+			continue;
+
+		if (frh->table && (frh_get_table(frh, tb) != rule->table))
+			continue;
+
+		if (tb[FRA_PRIORITY] &&
+		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
+			continue;
+
+		if (tb[FRA_IFNAME] &&
+		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+			continue;
+
+		if (tb[FRA_FWMARK] &&
+		    (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
+			continue;
+
+		if (tb[FRA_FWMASK] &&
+		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
+			continue;
+
+		if (!ops->compare(rule, frh, tb))
+			continue;
+
+		if (rule->flags & FIB_RULE_PERMANENT) {
+			err = -EPERM;
+			goto errout;
+		}
+
+		list_del_rcu(&rule->list);
+		synchronize_rcu();
+		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+				   NETLINK_CB(skb).pid);
+		fib_rule_put(rule);
+		rules_ops_put(ops);
+		return 0;
+	}
+
+	err = -ENOENT;
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+					 struct fib_rule *rule)
+{
+	size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
+			 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
+			 + nla_total_size(4) /* FRA_PRIORITY */
+			 + nla_total_size(4) /* FRA_TABLE */
+			 + nla_total_size(4) /* FRA_FWMARK */
+			 + nla_total_size(4); /* FRA_FWMASK */
+
+	if (ops->nlmsg_payload)
+		payload += ops->nlmsg_payload(rule);
+
+	return payload;
+}
+
+static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
+			    u32 pid, u32 seq, int type, int flags,
+			    struct fib_rules_ops *ops)
+{
+	struct nlmsghdr *nlh;
+	struct fib_rule_hdr *frh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	frh = nlmsg_data(nlh);
+	frh->table = rule->table;
+	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
+	frh->res1 = 0;
+	frh->res2 = 0;
+	frh->action = rule->action;
+	frh->flags = rule->flags;
+
+	if (rule->ifname[0])
+		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+
+	if (rule->pref)
+		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+
+	if (rule->mark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule->mark);
+
+	if (rule->mark_mask || rule->mark)
+		NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
+
+	if (ops->fill(rule, skb, nlh, frh) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+{
+	int idx = 0;
+	struct fib_rule *rule;
+	struct fib_rules_ops *ops;
+
+	ops = lookup_rules_ops(family);
+	if (ops == NULL)
+		return -EAFNOSUPPORT;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+		if (idx < cb->args[0])
+			goto skip;
+
+		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
+				     NLM_F_MULTI, ops) < 0)
+			break;
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	rules_ops_put(ops);
+
+	return skb->len;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(ops->nlgroup, err);
+}
+
+static void attach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list) {
+		if (rule->ifindex == -1 &&
+		    strcmp(dev->name, rule->ifname) == 0)
+			rule->ifindex = dev->ifindex;
+	}
+}
+
+static void detach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list)
+		if (rule->ifindex == dev->ifindex)
+			rule->ifindex = -1;
+}
+
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+	rcu_read_lock();
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			attach_rules(ops->rules_list, dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			detach_rules(ops->rules_list, dev);
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_rules_notifier = {
+	.notifier_call = fib_rules_event,
+};
+
+static int __init fib_rules_init(void)
+{
+	return register_netdevice_notifier(&fib_rules_notifier);
+}
+
+subsys_initcall(fib_rules_init);
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -0,0 +1,437 @@
+/*
+ * Linux Socket Filter - Kernel level socket filtering
+ *
+ * Author:
+ *     Jay Schulist <jschlst@samba.org>
+ *
+ * Based on the design of:
+ *     - The Berkeley Packet Filter
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Andi Kleen - Fix a few bad bugs and races.
+ * Kris Katterjohn - Added many additional checks in sk_chk_filter()
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fcntl.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/if_packet.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <linux/filter.h>
+
+/* No hurry in this branch */
+static void *__load_pointer(struct sk_buff *skb, int k)
+{
+	u8 *ptr = NULL;
+
+	if (k >= SKF_NET_OFF)
+		ptr = skb->nh.raw + k - SKF_NET_OFF;
+	else if (k >= SKF_LL_OFF)
+		ptr = skb->mac.raw + k - SKF_LL_OFF;
+
+	if (ptr >= skb->head && ptr < skb->tail)
+		return ptr;
+	return NULL;
+}
+
+static inline void *load_pointer(struct sk_buff *skb, int k,
+				 unsigned int size, void *buffer)
+{
+	if (k >= 0)
+		return skb_header_pointer(skb, k, size, buffer);
+	else {
+		if (k >= SKF_AD_OFF)
+			return NULL;
+		return __load_pointer(skb, k);
+	}
+}
+
+/**
+ *	sk_run_filter - run a filter on a socket
+ *	@skb: buffer to run the filter on
+ *	@filter: filter to apply
+ *	@flen: length of filter
+ *
+ * Decode and apply filter instructions to the skb->data.
+ * Return length to keep, 0 for none. skb is the data we are
+ * filtering, filter is the array of filter instructions, and
+ * len is the number of filter blocks in the array.
+ */
+unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+{
+	struct sock_filter *fentry;	/* We walk down these */
+	void *ptr;
+	u32 A = 0;			/* Accumulator */
+	u32 X = 0;			/* Index Register */
+	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	u32 tmp;
+	int k;
+	int pc;
+
+	/*
+	 * Process array of filter instructions.
+	 */
+	for (pc = 0; pc < flen; pc++) {
+		fentry = &filter[pc];
+
+		switch (fentry->code) {
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += fentry->k;
+			continue;
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= fentry->k;
+			continue;
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= fentry->k;
+			continue;
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+		case BPF_ALU|BPF_DIV|BPF_K:
+			A /= fentry->k;
+			continue;
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= fentry->k;
+			continue;
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= fentry->k;
+			continue;
+		case BPF_ALU|BPF_LSH|BPF_X:
+			A <<= X;
+			continue;
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= fentry->k;
+			continue;
+		case BPF_ALU|BPF_RSH|BPF_X:
+			A >>= X;
+			continue;
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= fentry->k;
+			continue;
+		case BPF_ALU|BPF_NEG:
+			A = -A;
+			continue;
+		case BPF_JMP|BPF_JA:
+			pc += fentry->k;
+			continue;
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? fentry->jt : fentry->jf;
+			continue;
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = fentry->k;
+load_w:
+			ptr = load_pointer(skb, k, 4, &tmp);
+			if (ptr != NULL) {
+				A = ntohl(get_unaligned((__be32 *)ptr));
+				continue;
+			}
+			break;
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = fentry->k;
+load_h:
+			ptr = load_pointer(skb, k, 2, &tmp);
+			if (ptr != NULL) {
+				A = ntohs(get_unaligned((__be16 *)ptr));
+				continue;
+			}
+			break;
+		case BPF_LD|BPF_B|BPF_ABS:
+			k = fentry->k;
+load_b:
+			ptr = load_pointer(skb, k, 1, &tmp);
+			if (ptr != NULL) {
+				A = *(u8 *)ptr;
+				continue;
+			}
+			break;
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = skb->len;
+			continue;
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = skb->len;
+			continue;
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + fentry->k;
+			goto load_w;
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + fentry->k;
+			goto load_h;
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + fentry->k;
+			goto load_b;
+		case BPF_LDX|BPF_B|BPF_MSH:
+			ptr = load_pointer(skb, fentry->k, 1, &tmp);
+			if (ptr != NULL) {
+				X = (*(u8 *)ptr & 0xf) << 2;
+				continue;
+			}
+			return 0;
+		case BPF_LD|BPF_IMM:
+			A = fentry->k;
+			continue;
+		case BPF_LDX|BPF_IMM:
+			X = fentry->k;
+			continue;
+		case BPF_LD|BPF_MEM:
+			A = mem[fentry->k];
+			continue;
+		case BPF_LDX|BPF_MEM:
+			X = mem[fentry->k];
+			continue;
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		case BPF_RET|BPF_K:
+			return fentry->k;
+		case BPF_RET|BPF_A:
+			return A;
+		case BPF_ST:
+			mem[fentry->k] = A;
+			continue;
+		case BPF_STX:
+			mem[fentry->k] = X;
+			continue;
+		default:
+			WARN_ON(1);
+			return 0;
+		}
+
+		/*
+		 * Handle ancillary data, which are impossible
+		 * (or very difficult) to get parsing packet contents.
+		 */
+		switch (k-SKF_AD_OFF) {
+		case SKF_AD_PROTOCOL:
+			A = ntohs(skb->protocol);
+			continue;
+		case SKF_AD_PKTTYPE:
+			A = skb->pkt_type;
+			continue;
+		case SKF_AD_IFINDEX:
+			A = skb->dev->ifindex;
+			continue;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	sk_chk_filter - verify socket filter code
+ *	@filter: filter to verify
+ *	@flen: length of filter
+ *
+ * Check the user's filter code. If we let some ugly
+ * filter code slip through kaboom! The filter must contain
+ * no references or jumps that are out of range, no illegal
+ * instructions, and must end with a RET instruction.
+ *
+ * All jumps are forward as they are not signed.
+ *
+ * Returns 0 if the rule set is legal or -EINVAL if not.
+ */
+int sk_chk_filter(struct sock_filter *filter, int flen)
+{
+	struct sock_filter *ftest;
+	int pc;
+
+	if (flen == 0 || flen > BPF_MAXINSNS)
+		return -EINVAL;
+
+	/* check the filter code now */
+	for (pc = 0; pc < flen; pc++) {
+		ftest = &filter[pc];
+
+		/* Only allow valid instructions */
+		switch (ftest->code) {
+		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_ALU|BPF_NEG:
+		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_LD|BPF_IMM:
+		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_LDX|BPF_IMM:
+		case BPF_MISC|BPF_TAX:
+		case BPF_MISC|BPF_TXA:
+		case BPF_RET|BPF_K:
+		case BPF_RET|BPF_A:
+			break;
+
+		/* Some instructions need special checks */
+
+		case BPF_ALU|BPF_DIV|BPF_K:
+			/* check for division by zero */
+			if (ftest->k == 0)
+				return -EINVAL;
+			break;
+
+		case BPF_LD|BPF_MEM:
+		case BPF_LDX|BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			/* check for invalid memory addresses */
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			break;
+
+		case BPF_JMP|BPF_JA:
+			/*
+			 * Note, the large ftest->k might cause loops.
+			 * Compare this with conditional jumps below,
+			 * where offsets are limited. --ANK (981016)
+			 */
+			if (ftest->k >= (unsigned)(flen-pc-1))
+				return -EINVAL;
+			break;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_JMP|BPF_JSET|BPF_X:
+			/* for conditionals both must be safe */
+			if (pc + ftest->jt + 1 >= flen ||
+			    pc + ftest->jf + 1 >= flen)
+				return -EINVAL;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
+}
+
+/**
+ *	sk_attach_filter - attach a socket filter
+ *	@fprog: the filter program
+ *	@sk: the socket to use
+ *
+ * Attach the user's filter code. We first run some sanity checks on
+ * it to make sure it does not explode on us later. If an error
+ * occurs or there is insufficient memory for the filter a negative
+ * errno code is returned. On success the return is zero.
+ */
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	struct sk_filter *fp;
+	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	int err;
+
+	/* Make sure new filter is there and in the right amounts. */
+	if (fprog->filter == NULL)
+		return -EINVAL;
+
+	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+		return -EFAULT;
+	}
+
+	atomic_set(&fp->refcnt, 1);
+	fp->len = fprog->len;
+
+	err = sk_chk_filter(fp->insns, fp->len);
+	if (!err) {
+		struct sk_filter *old_fp;
+
+		rcu_read_lock_bh();
+		old_fp = rcu_dereference(sk->sk_filter);
+		rcu_assign_pointer(sk->sk_filter, fp);
+		rcu_read_unlock_bh();
+		fp = old_fp;
+	}
+
+	if (fp)
+		sk_filter_release(sk, fp);
+	return err;
+}
+
+EXPORT_SYMBOL(sk_chk_filter);
+EXPORT_SYMBOL(sk_run_filter);
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -0,0 +1,373 @@
+/* flow.c: Generic flow cache.
+ *
+ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/bitops.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/mutex.h>
+#include <net/flow.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <linux/security.h>
+
+struct flow_cache_entry {
+	struct flow_cache_entry	*next;
+	u16			family;
+	u8			dir;
+	struct flowi		key;
+	u32			genid;
+	void			*object;
+	atomic_t		*object_ref;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+
+static u32 flow_hash_shift;
+#define flow_hash_size	(1 << flow_hash_shift)
+static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+
+#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+
+static struct kmem_cache *flow_cachep __read_mostly;
+
+static int flow_lwm, flow_hwm;
+
+struct flow_percpu_info {
+	int hash_rnd_recalc;
+	u32 hash_rnd;
+	int count;
+} ____cacheline_aligned;
+static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
+
+#define flow_hash_rnd_recalc(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+#define flow_hash_rnd(cpu) \
+	(per_cpu(flow_hash_info, cpu).hash_rnd)
+#define flow_count(cpu) \
+	(per_cpu(flow_hash_info, cpu).count)
+
+static struct timer_list flow_hash_rnd_timer;
+
+#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
+
+struct flow_flush_info {
+	atomic_t cpuleft;
+	struct completion completion;
+};
+static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
+
+#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+
+static void flow_cache_new_hashrnd(unsigned long arg)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		flow_hash_rnd_recalc(i) = 1;
+
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+}
+
+static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+{
+	if (fle->object)
+		atomic_dec(fle->object_ref);
+	kmem_cache_free(flow_cachep, fle);
+	flow_count(cpu)--;
+}
+
+static void __flow_cache_shrink(int cpu, int shrink_to)
+{
+	struct flow_cache_entry *fle, **flp;
+	int i;
+
+	for (i = 0; i < flow_hash_size; i++) {
+		int k = 0;
+
+		flp = &flow_table(cpu)[i];
+		while ((fle = *flp) != NULL && k < shrink_to) {
+			k++;
+			flp = &fle->next;
+		}
+		while ((fle = *flp) != NULL) {
+			*flp = fle->next;
+			flow_entry_kill(cpu, fle);
+		}
+	}
+}
+
+static void flow_cache_shrink(int cpu)
+{
+	int shrink_to = flow_lwm / flow_hash_size;
+
+	__flow_cache_shrink(cpu, shrink_to);
+}
+
+static void flow_new_hash_rnd(int cpu)
+{
+	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+	flow_hash_rnd_recalc(cpu) = 0;
+
+	__flow_cache_shrink(cpu, 0);
+}
+
+static u32 flow_hash_code(struct flowi *key, int cpu)
+{
+	u32 *k = (u32 *) key;
+
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+		(flow_hash_size - 1));
+}
+
+#if (BITS_PER_LONG == 64)
+typedef u64 flow_compare_t;
+#else
+typedef u32 flow_compare_t;
+#endif
+
+extern void flowi_is_missized(void);
+
+/* I hear what you're saying, use memcmp.  But memcmp cannot make
+ * important assumptions that we can here, such as alignment and
+ * constant size.
+ */
+static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+{
+	flow_compare_t *k1, *k1_lim, *k2;
+	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
+
+	if (sizeof(struct flowi) % sizeof(flow_compare_t))
+		flowi_is_missized();
+
+	k1 = (flow_compare_t *) key1;
+	k1_lim = k1 + n_elem;
+
+	k2 = (flow_compare_t *) key2;
+
+	do {
+		if (*k1++ != *k2++)
+			return 1;
+	} while (k1 < k1_lim);
+
+	return 0;
+}
+
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+			flow_resolve_t resolver)
+{
+	struct flow_cache_entry *fle, **head;
+	unsigned int hash;
+	int cpu;
+
+	local_bh_disable();
+	cpu = smp_processor_id();
+
+	fle = NULL;
+	/* Packet really early in init?  Making flow_cache_init a
+	 * pre-smp initcall would solve this.  --RR */
+	if (!flow_table(cpu))
+		goto nocache;
+
+	if (flow_hash_rnd_recalc(cpu))
+		flow_new_hash_rnd(cpu);
+	hash = flow_hash_code(key, cpu);
+
+	head = &flow_table(cpu)[hash];
+	for (fle = *head; fle; fle = fle->next) {
+		if (fle->family == family &&
+		    fle->dir == dir &&
+		    flow_key_compare(key, &fle->key) == 0) {
+			if (fle->genid == atomic_read(&flow_cache_genid)) {
+				void *ret = fle->object;
+
+				if (ret)
+					atomic_inc(fle->object_ref);
+				local_bh_enable();
+
+				return ret;
+			}
+			break;
+		}
+	}
+
+	if (!fle) {
+		if (flow_count(cpu) > flow_hwm)
+			flow_cache_shrink(cpu);
+
+		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+		if (fle) {
+			fle->next = *head;
+			*head = fle;
+			fle->family = family;
+			fle->dir = dir;
+			memcpy(&fle->key, key, sizeof(*key));
+			fle->object = NULL;
+			flow_count(cpu)++;
+		}
+	}
+
+nocache:
+	{
+		int err;
+		void *obj;
+		atomic_t *obj_ref;
+
+		err = resolver(key, family, dir, &obj, &obj_ref);
+
+		if (fle && !err) {
+			fle->genid = atomic_read(&flow_cache_genid);
+
+			if (fle->object)
+				atomic_dec(fle->object_ref);
+
+			fle->object = obj;
+			fle->object_ref = obj_ref;
+			if (obj)
+				atomic_inc(fle->object_ref);
+		}
+		local_bh_enable();
+
+		if (err)
+			obj = ERR_PTR(err);
+		return obj;
+	}
+}
+
+static void flow_cache_flush_tasklet(unsigned long data)
+{
+	struct flow_flush_info *info = (void *)data;
+	int i;
+	int cpu;
+
+	cpu = smp_processor_id();
+	for (i = 0; i < flow_hash_size; i++) {
+		struct flow_cache_entry *fle;
+
+		fle = flow_table(cpu)[i];
+		for (; fle; fle = fle->next) {
+			unsigned genid = atomic_read(&flow_cache_genid);
+
+			if (!fle->object || fle->genid == genid)
+				continue;
+
+			fle->object = NULL;
+			atomic_dec(fle->object_ref);
+		}
+	}
+
+	if (atomic_dec_and_test(&info->cpuleft))
+		complete(&info->completion);
+}
+
+static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+static void flow_cache_flush_per_cpu(void *data)
+{
+	struct flow_flush_info *info = data;
+	int cpu;
+	struct tasklet_struct *tasklet;
+
+	cpu = smp_processor_id();
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet->data = (unsigned long)info;
+	tasklet_schedule(tasklet);
+}
+
+void flow_cache_flush(void)
+{
+	struct flow_flush_info info;
+	static DEFINE_MUTEX(flow_flush_sem);
+
+	/* Don't want cpus going down or up during this. */
+	lock_cpu_hotplug();
+	mutex_lock(&flow_flush_sem);
+	atomic_set(&info.cpuleft, num_online_cpus());
+	init_completion(&info.completion);
+
+	local_bh_disable();
+	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+	flow_cache_flush_tasklet((unsigned long)&info);
+	local_bh_enable();
+
+	wait_for_completion(&info.completion);
+	mutex_unlock(&flow_flush_sem);
+	unlock_cpu_hotplug();
+}
+
+static void __devinit flow_cache_cpu_prepare(int cpu)
+{
+	struct tasklet_struct *tasklet;
+	unsigned long order;
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
+	     order++)
+		/* NOTHING */;
+
+	flow_table(cpu) = (struct flow_cache_entry **)
+		__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+	if (!flow_table(cpu))
+		panic("NET: failed to allocate flow cache order %lu\n", order);
+
+	flow_hash_rnd_recalc(cpu) = 1;
+	flow_count(cpu) = 0;
+
+	tasklet = flow_flush_tasklet(cpu);
+	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+}
+
+static int flow_cache_cpu(struct notifier_block *nfb,
+			  unsigned long action,
+			  void *hcpu)
+{
+	if (action == CPU_DEAD)
+		__flow_cache_shrink((unsigned long)hcpu, 0);
+	return NOTIFY_OK;
+}
+
+static int __init flow_cache_init(void)
+{
+	int i;
+
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					NULL, NULL);
+	flow_hash_shift = 10;
+	flow_lwm = 2 * flow_hash_size;
+	flow_hwm = 4 * flow_hash_size;
+
+	init_timer(&flow_hash_rnd_timer);
+	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&flow_hash_rnd_timer);
+
+	for_each_possible_cpu(i)
+		flow_cache_cpu_prepare(i);
+
+	hotcpu_notifier(flow_cache_cpu, 0);
+	return 0;
+}
+
+module_init(flow_cache_init);
+
+EXPORT_SYMBOL(flow_cache_genid);
+EXPORT_SYMBOL(flow_cache_lookup);
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -0,0 +1,249 @@
+/*
+ * net/sched/gen_estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *              Jamal Hadi Salim - moved it to net/core and reshulfed
+ *              names to make it usable in general net subsystem.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/gen_stats.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * The stored value for avbps is scaled by 2^5, so that maximal
+     rate is ~1Gbit, avpps is scaled by 2^10.
+
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct gen_estimator
+{
+	struct gen_estimator	*next;
+	struct gnet_stats_basic	*bstats;
+	struct gnet_stats_rate_est	*rate_est;
+	spinlock_t		*stats_lock;
+	unsigned		interval;
+	int			ewma_log;
+	u64			last_bytes;
+	u32			last_packets;
+	u32			avpps;
+	u32			avbps;
+};
+
+struct gen_estimator_head
+{
+	struct timer_list	timer;
+	struct gen_estimator	*list;
+};
+
+static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Estimator array lock */
+static DEFINE_RWLOCK(est_lock);
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct gen_estimator *e;
+
+	read_lock(&est_lock);
+	for (e = elist[idx].list; e; e = e->next) {
+		u64 nbytes;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		nbytes = e->bstats->bytes;
+		npackets = e->bstats->packets;
+		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		e->rate_est->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+		spin_unlock(e->stats_lock);
+	}
+
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	read_unlock(&est_lock);
+}
+
+/**
+ * gen_new_estimator - create a new rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Creates a new rate estimator with &bstats as source and &rate_est
+ * as destination. A new timer with the interval specified in the
+ * configuration TLV is created. Upon each interval, the latest statistics
+ * will be read from &bstats and the estimated rate will be stored in
+ * &rate_est with the statistics lock grabed during this period.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int gen_new_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
+{
+	struct gen_estimator *est;
+	struct gnet_estimator *parm = RTA_DATA(opt);
+
+	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kzalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	est->interval = parm->interval + 2;
+	est->bstats = bstats;
+	est->rate_est = rate_est;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = bstats->bytes;
+	est->avbps = rate_est->bps<<5;
+	est->last_packets = bstats->packets;
+	est->avpps = rate_est->pps<<10;
+
+	est->next = elist[est->interval].list;
+	if (est->next == NULL) {
+		init_timer(&elist[est->interval].timer);
+		elist[est->interval].timer.data = est->interval;
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
+		elist[est->interval].timer.function = est_timer;
+		add_timer(&elist[est->interval].timer);
+	}
+	write_lock_bh(&est_lock);
+	elist[est->interval].list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+/**
+ * gen_kill_estimator - remove a rate estimator
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ *
+ * Removes the rate estimator specified by &bstats and &rate_est
+ * and deletes the timer.
+ */
+void gen_kill_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est)
+{
+	int idx;
+	struct gen_estimator *est, **pest;
+
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		int killed = 0;
+		pest = &elist[idx].list;
+		while ((est=*pest) != NULL) {
+			if (est->rate_est != rate_est || est->bstats != bstats) {
+				pest = &est->next;
+				continue;
+			}
+
+			write_lock_bh(&est_lock);
+			*pest = est->next;
+			write_unlock_bh(&est_lock);
+
+			kfree(est);
+			killed++;
+		}
+		if (killed && elist[idx].list == NULL)
+			del_timer(&elist[idx].timer);
+	}
+}
+
+/**
+ * gen_replace_estimator - replace rate estimator configruation
+ * @bstats: basic statistics
+ * @rate_est: rate estimator statistics
+ * @stats_lock: statistics lock
+ * @opt: rate estimator configuration TLV
+ *
+ * Replaces the configuration of a rate estimator by calling
+ * gen_kill_estimator() and gen_new_estimator().
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int
+gen_replace_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
+	struct rtattr *opt)
+{
+    gen_kill_estimator(bstats, rate_est);
+    return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+}
+
+
+EXPORT_SYMBOL(gen_kill_estimator);
+EXPORT_SYMBOL(gen_new_estimator);
+EXPORT_SYMBOL(gen_replace_estimator);
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -0,0 +1,239 @@
+/*
+ * net/core/gen_stats.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:  Thomas Graf <tgraf@suug.ch>
+ *           Jamal Hadi Salim
+ *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * See Documentation/networking/gen_stats.txt
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/gen_stats.h>
+#include <net/gen_stats.h>
+
+
+static inline int
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+{
+	RTA_PUT(d->skb, type, size, buf);
+	return 0;
+
+rtattr_failure:
+	spin_unlock_bh(d->lock);
+	return -1;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV
+ * @xstats_type: TLV type for backward compatibility xstats TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * The dumping handle is marked to be in backward compatibility mode telling
+ * all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
+	int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+{
+	memset(d, 0, sizeof(*d));
+
+	spin_lock_bh(lock);
+	d->lock = lock;
+	if (type)
+		d->tail = (struct rtattr *) skb->tail;
+	d->skb = skb;
+	d->compat_tc_stats = tc_stats_type;
+	d->compat_xstats = xstats_type;
+
+	if (d->tail)
+		return gnet_stats_copy(d, type, NULL, 0);
+
+	return 0;
+}
+
+/**
+ * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
+ * @skb: socket buffer to put statistics TLVs into
+ * @type: TLV type for top level statistic TLV
+ * @lock: statistics lock
+ * @d: dumping handle
+ *
+ * Initializes the dumping handle, grabs the statistic lock and appends
+ * an empty TLV header to the socket buffer for use a container for all
+ * other statistic TLVS.
+ *
+ * Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+	struct gnet_dump *d)
+{
+	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+}
+
+/**
+ * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+ * @d: dumping handle
+ * @b: basic statistics
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bytes = b->bytes;
+		d->tc_stats.packets = b->packets;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
+ * @d: dumping handle
+ * @r: rate estimator statistics
+ *
+ * Appends the rate estimator statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bps = r->bps;
+		d->tc_stats.pps = r->pps;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_queue - copy queue statistics into statistics TLV
+ * @d: dumping handle
+ * @q: queue statistics
+ *
+ * Appends the queue statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.drops = q->drops;
+		d->tc_stats.qlen = q->qlen;
+		d->tc_stats.backlog = q->backlog;
+		d->tc_stats.overlimits = q->overlimits;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
+
+	return 0;
+}
+
+/**
+ * gnet_stats_copy_app - copy application specific statistics into statistics TLV
+ * @d: dumping handle
+ * @st: application specific statistics data
+ * @len: length of data
+ *
+ * Appends the application sepecific statistics to the top level TLV created by
+ * gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
+ * handle is in backward compatibility mode.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
+{
+	if (d->compat_xstats) {
+		d->xstats = st;
+		d->xstats_len = len;
+	}
+
+	if (d->tail)
+		return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+
+	return 0;
+}
+
+/**
+ * gnet_stats_finish_copy - finish dumping procedure
+ * @d: dumping handle
+ *
+ * Corrects the length of the top level TLV to include all TLVs added
+ * by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs
+ * if gnet_stats_start_copy_compat() was used and releases the statistics
+ * lock.
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_finish_copy(struct gnet_dump *d)
+{
+	if (d->tail)
+		d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+
+	if (d->compat_tc_stats)
+		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
+			sizeof(d->tc_stats)) < 0)
+			return -1;
+
+	if (d->compat_xstats && d->xstats) {
+		if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
+			d->xstats_len) < 0)
+			return -1;
+	}
+
+	spin_unlock_bh(d->lock);
+	return 0;
+}
+
+
+EXPORT_SYMBOL(gnet_stats_start_copy);
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
+EXPORT_SYMBOL(gnet_stats_copy_basic);
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+EXPORT_SYMBOL(gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_copy_app);
+EXPORT_SYMBOL(gnet_stats_finish_copy);
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -0,0 +1,238 @@
+/*
+ *	iovec manipulation routines.
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *		Andrew Lunn	:	Errors in iovec copying.
+ *		Pedro Roque	:	Added memcpy_fromiovecend and
+ *					csum_..._fromiovecend.
+ *		Andi Kleen	:	fixed error handling for 2.1
+ *		Alexey Kuznetsov:	2.1 optimisations
+ *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+
+/*
+ *	Verify iovec. The caller must ensure that the iovec is big enough
+ *	to hold the message iovec.
+ *
+ *	Save time not doing access_ok. copy_*_user will make this work
+ *	in any case.
+ */
+
+int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
+{
+	int size, err, ct;
+
+	if (m->msg_namelen) {
+		if (mode == VERIFY_READ) {
+			err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
+						  address);
+			if (err < 0)
+				return err;
+		}
+		m->msg_name = address;
+	} else {
+		m->msg_name = NULL;
+	}
+
+	size = m->msg_iovlen * sizeof(struct iovec);
+	if (copy_from_user(iov, m->msg_iov, size))
+		return -EFAULT;
+
+	m->msg_iov = iov;
+	err = 0;
+
+	for (ct = 0; ct < m->msg_iovlen; ct++) {
+		err += iov[ct].iov_len;
+		/*
+		 * Goal is not to verify user data, but to prevent returning
+		 * negative value, which is interpreted as errno.
+		 * Overflow is still possible, but it is harmless.
+		 */
+		if (err < 0)
+			return -EMSGSIZE;
+	}
+
+	return err;
+}
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+
+int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, iov->iov_len, len);
+			if (copy_to_user(iov->iov_base, kdata, copy))
+				return -EFAULT;
+			kdata += copy;
+			len -= copy;
+			iov->iov_len -= copy;
+			iov->iov_base += copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ *
+ *	Note: this modifies the original iovec.
+ */
+
+int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
+{
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, len, iov->iov_len);
+			if (copy_from_user(kdata, iov->iov_base, copy))
+				return -EFAULT;
+			len -= copy;
+			kdata += copy;
+			iov->iov_base += copy;
+			iov->iov_len -= copy;
+		}
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	For use with ip_build_xmit
+ */
+int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
+			int len)
+{
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+
+/*
+ *	And now for the all-in-one: copy and checksum from a user iovec
+ *	directly to a datagram
+ *	Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ *	ip_build_xmit must ensure that when fragmenting only the last
+ *	call to this function will be unaligned also.
+ */
+int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
+				 int offset, unsigned int len, __wsum *csump)
+{
+	__wsum csum = *csump;
+	int partial_cnt = 0, err = 0;
+
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+
+		/* There is a remnant from previous iov. */
+		if (partial_cnt) {
+			int par_len = 4 - partial_cnt;
+
+			/* iov component is too short ... */
+			if (par_len > copy) {
+				if (copy_from_user(kdata, base, copy))
+					goto out_fault;
+				kdata += copy;
+				base += copy;
+				partial_cnt += copy;
+				len -= copy;
+				iov++;
+				if (len)
+					continue;
+				*csump = csum_partial(kdata - partial_cnt,
+							 partial_cnt, csum);
+				goto out;
+			}
+			if (copy_from_user(kdata, base, par_len))
+				goto out_fault;
+			csum = csum_partial(kdata - partial_cnt, 4, csum);
+			kdata += par_len;
+			base  += par_len;
+			copy  -= par_len;
+			len   -= par_len;
+			partial_cnt = 0;
+		}
+
+		if (len > copy) {
+			partial_cnt = copy % 4;
+			if (partial_cnt) {
+				copy -= partial_cnt;
+				if (copy_from_user(kdata + copy, base + copy,
+						partial_cnt))
+					goto out_fault;
+			}
+		}
+
+		if (copy) {
+			csum = csum_and_copy_from_user(base, kdata, copy,
+							csum, &err);
+			if (err)
+				goto out;
+		}
+		len   -= copy + partial_cnt;
+		kdata += copy + partial_cnt;
+		iov++;
+	}
+	*csump = csum;
+out:
+	return err;
+
+out_fault:
+	err = -EFAULT;
+	goto out;
+}
+
+EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
+EXPORT_SYMBOL(memcpy_fromiovec);
+EXPORT_SYMBOL(memcpy_fromiovecend);
+EXPORT_SYMBOL(memcpy_toiovec);
--- a/net/core/kmap_skb.h
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
+#include <linux/highmem.h>
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+	BUG_ON(in_irq());
+
+	local_bh_disable();
+#endif
+	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+	local_bh_enable();
+#endif
+}
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -0,0 +1,181 @@
+/*
+ * Linux network device link state notification
+ *
+ * Author:
+ *     Stefan Rompf <sux@loplof.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+
+enum lw_bits {
+	LW_RUNNING = 0,
+	LW_SE_USED
+};
+
+static unsigned long linkwatch_flags;
+static unsigned long linkwatch_nextevent;
+
+static void linkwatch_event(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
+
+static LIST_HEAD(lweventlist);
+static DEFINE_SPINLOCK(lweventlist_lock);
+
+struct lw_event {
+	struct list_head list;
+	struct net_device *dev;
+};
+
+/* Avoid kmalloc() for most systems */
+static struct lw_event singleevent;
+
+static unsigned char default_operstate(const struct net_device *dev)
+{
+	if (!netif_carrier_ok(dev))
+		return (dev->ifindex != dev->iflink ?
+			IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
+
+	if (netif_dormant(dev))
+		return IF_OPER_DORMANT;
+
+	return IF_OPER_UP;
+}
+
+
+static void rfc2863_policy(struct net_device *dev)
+{
+	unsigned char operstate = default_operstate(dev);
+
+	if (operstate == dev->operstate)
+		return;
+
+	write_lock_bh(&dev_base_lock);
+
+	switch(dev->link_mode) {
+	case IF_LINK_MODE_DORMANT:
+		if (operstate == IF_OPER_UP)
+			operstate = IF_OPER_DORMANT;
+		break;
+
+	case IF_LINK_MODE_DEFAULT:
+	default:
+		break;
+	};
+
+	dev->operstate = operstate;
+
+	write_unlock_bh(&dev_base_lock);
+}
+
+
+/* Must be called with the rtnl semaphore held */
+void linkwatch_run_queue(void)
+{
+	struct list_head head, *n, *next;
+
+	spin_lock_irq(&lweventlist_lock);
+	list_replace_init(&lweventlist, &head);
+	spin_unlock_irq(&lweventlist_lock);
+
+	list_for_each_safe(n, next, &head) {
+		struct lw_event *event = list_entry(n, struct lw_event, list);
+		struct net_device *dev = event->dev;
+
+		if (event == &singleevent) {
+			clear_bit(LW_SE_USED, &linkwatch_flags);
+		} else {
+			kfree(event);
+		}
+
+		/* We are about to handle this device,
+		 * so new events can be accepted
+		 */
+		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+
+		rfc2863_policy(dev);
+		if (dev->flags & IFF_UP) {
+			if (netif_carrier_ok(dev)) {
+				WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
+				dev_activate(dev);
+			} else
+				dev_deactivate(dev);
+
+			netdev_state_change(dev);
+		}
+
+		dev_put(dev);
+	}
+}
+
+
+static void linkwatch_event(struct work_struct *dummy)
+{
+	/* Limit the number of linkwatch events to one
+	 * per second so that a runaway driver does not
+	 * cause a storm of messages on the netlink
+	 * socket
+	 */
+	linkwatch_nextevent = jiffies + HZ;
+	clear_bit(LW_RUNNING, &linkwatch_flags);
+
+	rtnl_lock();
+	linkwatch_run_queue();
+	rtnl_unlock();
+}
+
+
+void linkwatch_fire_event(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
+		unsigned long flags;
+		struct lw_event *event;
+
+		if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
+			event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
+
+			if (unlikely(event == NULL)) {
+				clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
+				return;
+			}
+		} else {
+			event = &singleevent;
+		}
+
+		dev_hold(dev);
+		event->dev = dev;
+
+		spin_lock_irqsave(&lweventlist_lock, flags);
+		list_add_tail(&event->list, &lweventlist);
+		spin_unlock_irqrestore(&lweventlist_lock, flags);
+
+		if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
+			unsigned long delay = linkwatch_nextevent - jiffies;
+
+			/* If we wrap around we'll delay it by at most HZ. */
+			if (delay > HZ)
+				delay = 0;
+			schedule_delayed_work(&linkwatch_work, delay);
+		}
+	}
+}
+
+EXPORT_SYMBOL(linkwatch_fire_event);
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -0,0 +1,493 @@
+/*
+ * net-sysfs.c - network device class and attributes
+ *
+ * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/wireless.h>
+#include <net/iw_handler.h>
+
+static const char fmt_hex[] = "%#x\n";
+static const char fmt_long_hex[] = "%#lx\n";
+static const char fmt_dec[] = "%d\n";
+static const char fmt_ulong[] = "%lu\n";
+
+static inline int dev_isalive(const struct net_device *dev)
+{
+	return dev->reg_state <= NETREG_REGISTERED;
+}
+
+/* use same locking rules as GIF* ioctl's */
+static ssize_t netdev_show(const struct device *dev,
+			   struct device_attribute *attr, char *buf,
+			   ssize_t (*format)(const struct net_device *, char *))
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+		ret = (*format)(net, buf);
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* generate a show function for simple field */
+#define NETDEVICE_SHOW(field, format_string)				\
+static ssize_t format_##field(const struct net_device *net, char *buf)	\
+{									\
+	return sprintf(buf, format_string, net->field);			\
+}									\
+static ssize_t show_##field(struct device *dev,				\
+			    struct device_attribute *attr, char *buf)	\
+{									\
+	return netdev_show(dev, attr, buf, format_##field);		\
+}
+
+
+/* use same locking and permission rules as SIF* ioctl's */
+static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t len,
+			    int (*set)(struct net_device *, unsigned long))
+{
+	struct net_device *net = to_net_dev(dev);
+	char *endp;
+	unsigned long new;
+	int ret = -EINVAL;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	new = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		goto err;
+
+	rtnl_lock();
+	if (dev_isalive(net)) {
+		if ((ret = (*set)(net, new)) == 0)
+			ret = len;
+	}
+	rtnl_unlock();
+ err:
+	return ret;
+}
+
+NETDEVICE_SHOW(addr_len, fmt_dec);
+NETDEVICE_SHOW(iflink, fmt_dec);
+NETDEVICE_SHOW(ifindex, fmt_dec);
+NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(type, fmt_dec);
+NETDEVICE_SHOW(link_mode, fmt_dec);
+
+/* use same locking rules as GIFHWADDR ioctl's */
+static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
+{
+	int i;
+	char *cp = buf;
+
+	for (i = 0; i < len; i++)
+		cp += sprintf(cp, "%02x%c", addr[i],
+			      i == (len - 1) ? '\n' : ':');
+	return cp - buf;
+}
+
+static ssize_t show_address(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(net))
+	    ret = format_addr(buf, net->dev_addr, net->addr_len);
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+static ssize_t show_broadcast(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	if (dev_isalive(net))
+		return format_addr(buf, net->broadcast, net->addr_len);
+	return -EINVAL;
+}
+
+static ssize_t show_carrier(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	if (netif_running(netdev)) {
+		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
+	}
+	return -EINVAL;
+}
+
+static ssize_t show_dormant(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+
+	if (netif_running(netdev))
+		return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
+
+	return -EINVAL;
+}
+
+static const char *operstates[] = {
+	"unknown",
+	"notpresent", /* currently unused */
+	"down",
+	"lowerlayerdown",
+	"testing", /* currently unused */
+	"dormant",
+	"up"
+};
+
+static ssize_t show_operstate(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	const struct net_device *netdev = to_net_dev(dev);
+	unsigned char operstate;
+
+	read_lock(&dev_base_lock);
+	operstate = netdev->operstate;
+	if (!netif_running(netdev))
+		operstate = IF_OPER_DOWN;
+	read_unlock(&dev_base_lock);
+
+	if (operstate >= ARRAY_SIZE(operstates))
+		return -EINVAL; /* should not happen */
+
+	return sprintf(buf, "%s\n", operstates[operstate]);
+}
+
+/* read-write attributes */
+NETDEVICE_SHOW(mtu, fmt_dec);
+
+static int change_mtu(struct net_device *net, unsigned long new_mtu)
+{
+	return dev_set_mtu(net, (int) new_mtu);
+}
+
+static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_mtu);
+}
+
+NETDEVICE_SHOW(flags, fmt_hex);
+
+static int change_flags(struct net_device *net, unsigned long new_flags)
+{
+	return dev_change_flags(net, (unsigned) new_flags);
+}
+
+static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_flags);
+}
+
+NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+
+static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
+{
+	net->tx_queue_len = new_len;
+	return 0;
+}
+
+static ssize_t store_tx_queue_len(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
+}
+
+NETDEVICE_SHOW(weight, fmt_dec);
+
+static int change_weight(struct net_device *net, unsigned long new_weight)
+{
+	net->weight = new_weight;
+	return 0;
+}
+
+static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t len)
+{
+	return netdev_store(dev, attr, buf, len, change_weight);
+}
+
+static struct device_attribute net_class_attributes[] = {
+	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
+	__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
+	__ATTR(features, S_IRUGO, show_features, NULL),
+	__ATTR(type, S_IRUGO, show_type, NULL),
+	__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
+	__ATTR(address, S_IRUGO, show_address, NULL),
+	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
+	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
+	__ATTR(operstate, S_IRUGO, show_operstate, NULL),
+	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
+	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
+	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
+	       store_tx_queue_len),
+	__ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
+	{}
+};
+
+/* Show a given an attribute in the statistics group */
+static ssize_t netstat_show(const struct device *d,
+			    struct device_attribute *attr, char *buf,
+			    unsigned long offset)
+{
+	struct net_device *dev = to_net_dev(d);
+	struct net_device_stats *stats;
+	ssize_t ret = -EINVAL;
+
+	if (offset > sizeof(struct net_device_stats) ||
+	    offset % sizeof(unsigned long) != 0)
+		WARN_ON(1);
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev) && dev->get_stats &&
+	    (stats = (*dev->get_stats)(dev)))
+		ret = sprintf(buf, fmt_ulong,
+			      *(unsigned long *)(((u8 *) stats) + offset));
+
+	read_unlock(&dev_base_lock);
+	return ret;
+}
+
+/* generate a read-only statistics attribute */
+#define NETSTAT_ENTRY(name)						\
+static ssize_t show_##name(struct device *d,				\
+			   struct device_attribute *attr, char *buf) 	\
+{									\
+	return netstat_show(d, attr, buf,				\
+			    offsetof(struct net_device_stats, name));	\
+}									\
+static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+NETSTAT_ENTRY(rx_packets);
+NETSTAT_ENTRY(tx_packets);
+NETSTAT_ENTRY(rx_bytes);
+NETSTAT_ENTRY(tx_bytes);
+NETSTAT_ENTRY(rx_errors);
+NETSTAT_ENTRY(tx_errors);
+NETSTAT_ENTRY(rx_dropped);
+NETSTAT_ENTRY(tx_dropped);
+NETSTAT_ENTRY(multicast);
+NETSTAT_ENTRY(collisions);
+NETSTAT_ENTRY(rx_length_errors);
+NETSTAT_ENTRY(rx_over_errors);
+NETSTAT_ENTRY(rx_crc_errors);
+NETSTAT_ENTRY(rx_frame_errors);
+NETSTAT_ENTRY(rx_fifo_errors);
+NETSTAT_ENTRY(rx_missed_errors);
+NETSTAT_ENTRY(tx_aborted_errors);
+NETSTAT_ENTRY(tx_carrier_errors);
+NETSTAT_ENTRY(tx_fifo_errors);
+NETSTAT_ENTRY(tx_heartbeat_errors);
+NETSTAT_ENTRY(tx_window_errors);
+NETSTAT_ENTRY(rx_compressed);
+NETSTAT_ENTRY(tx_compressed);
+
+static struct attribute *netstat_attrs[] = {
+	&dev_attr_rx_packets.attr,
+	&dev_attr_tx_packets.attr,
+	&dev_attr_rx_bytes.attr,
+	&dev_attr_tx_bytes.attr,
+	&dev_attr_rx_errors.attr,
+	&dev_attr_tx_errors.attr,
+	&dev_attr_rx_dropped.attr,
+	&dev_attr_tx_dropped.attr,
+	&dev_attr_multicast.attr,
+	&dev_attr_collisions.attr,
+	&dev_attr_rx_length_errors.attr,
+	&dev_attr_rx_over_errors.attr,
+	&dev_attr_rx_crc_errors.attr,
+	&dev_attr_rx_frame_errors.attr,
+	&dev_attr_rx_fifo_errors.attr,
+	&dev_attr_rx_missed_errors.attr,
+	&dev_attr_tx_aborted_errors.attr,
+	&dev_attr_tx_carrier_errors.attr,
+	&dev_attr_tx_fifo_errors.attr,
+	&dev_attr_tx_heartbeat_errors.attr,
+	&dev_attr_tx_window_errors.attr,
+	&dev_attr_rx_compressed.attr,
+	&dev_attr_tx_compressed.attr,
+	NULL
+};
+
+
+static struct attribute_group netstat_group = {
+	.name  = "statistics",
+	.attrs  = netstat_attrs,
+};
+
+#ifdef CONFIG_WIRELESS_EXT
+/* helper function that does all the locking etc for wireless stats */
+static ssize_t wireless_show(struct device *d, char *buf,
+			     ssize_t (*format)(const struct iw_statistics *,
+					       char *))
+{
+	struct net_device *dev = to_net_dev(d);
+	const struct iw_statistics *iw = NULL;
+	ssize_t ret = -EINVAL;
+
+	read_lock(&dev_base_lock);
+	if (dev_isalive(dev)) {
+		if(dev->wireless_handlers &&
+		   dev->wireless_handlers->get_wireless_stats)
+			iw = dev->wireless_handlers->get_wireless_stats(dev);
+		if (iw != NULL)
+			ret = (*format)(iw, buf);
+	}
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+/* show function template for wireless fields */
+#define WIRELESS_SHOW(name, field, format_string)			\
+static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
+{									\
+	return sprintf(buf, format_string, iw->field);			\
+}									\
+static ssize_t show_iw_##name(struct device *d,				\
+			      struct device_attribute *attr, char *buf)	\
+{									\
+	return wireless_show(d, buf, format_iw_##name);			\
+}									\
+static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
+
+WIRELESS_SHOW(status, status, fmt_hex);
+WIRELESS_SHOW(link, qual.qual, fmt_dec);
+WIRELESS_SHOW(level, qual.level, fmt_dec);
+WIRELESS_SHOW(noise, qual.noise, fmt_dec);
+WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
+WIRELESS_SHOW(crypt, discard.code, fmt_dec);
+WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
+WIRELESS_SHOW(misc, discard.misc, fmt_dec);
+WIRELESS_SHOW(retries, discard.retries, fmt_dec);
+WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
+
+static struct attribute *wireless_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_link.attr,
+	&dev_attr_level.attr,
+	&dev_attr_noise.attr,
+	&dev_attr_nwid.attr,
+	&dev_attr_crypt.attr,
+	&dev_attr_fragment.attr,
+	&dev_attr_retries.attr,
+	&dev_attr_misc.attr,
+	&dev_attr_beacon.attr,
+	NULL
+};
+
+static struct attribute_group wireless_group = {
+	.name = "wireless",
+	.attrs = wireless_attrs,
+};
+#endif
+
+#ifdef CONFIG_HOTPLUG
+static int netdev_uevent(struct device *d, char **envp,
+			 int num_envp, char *buf, int size)
+{
+	struct net_device *dev = to_net_dev(d);
+	int i = 0;
+	int n;
+
+	/* pass interface to uevent. */
+	envp[i++] = buf;
+	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
+	buf += n;
+	size -= n;
+
+	if ((size <= 0) || (i >= num_envp))
+		return -ENOMEM;
+
+	envp[i] = NULL;
+	return 0;
+}
+#endif
+
+/*
+ *	netdev_release -- destroy and free a dead device.
+ *	Called when last reference to device kobject is gone.
+ */
+static void netdev_release(struct device *d)
+{
+	struct net_device *dev = to_net_dev(d);
+
+	BUG_ON(dev->reg_state != NETREG_RELEASED);
+
+	kfree((char *)dev - dev->padded);
+}
+
+static struct class net_class = {
+	.name = "net",
+	.dev_release = netdev_release,
+	.dev_attrs = net_class_attributes,
+#ifdef CONFIG_HOTPLUG
+	.dev_uevent = netdev_uevent,
+#endif
+};
+
+/* Delete sysfs entries but hold kobject reference until after all
+ * netdev references are gone.
+ */
+void netdev_unregister_sysfs(struct net_device * net)
+{
+	struct device *dev = &(net->dev);
+
+	kobject_get(&dev->kobj);
+	device_del(dev);
+}
+
+/* Create sysfs entries for network device. */
+int netdev_register_sysfs(struct net_device *net)
+{
+	struct device *dev = &(net->dev);
+	struct attribute_group **groups = net->sysfs_groups;
+
+	device_initialize(dev);
+	dev->class = &net_class;
+	dev->platform_data = net;
+	dev->groups = groups;
+
+	BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
+	strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
+
+	if (net->get_stats)
+		*groups++ = &netstat_group;
+
+#ifdef CONFIG_WIRELESS_EXT
+	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
+		*groups++ = &wireless_group;
+#endif
+
+	return device_add(dev);
+}
+
+int netdev_sysfs_init(void)
+{
+	return class_register(&net_class);
+}
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -0,0 +1,69 @@
+/*
+ *	Network event notifiers
+ *
+ *	Authors:
+ *      Tom Tucker             <tom@opengridcomputing.com>
+ *      Steve Wise             <swise@opengridcomputing.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ */
+
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);
+
+/**
+ *	register_netevent_notifier - register a netevent notifier block
+ *	@nb: notifier
+ *
+ *	Register a notifier to be called when a netevent occurs.
+ *	The notifier passed is linked into the kernel structures and must
+ *	not be reused until it has been unregistered. A negative errno code
+ *	is returned on a failure.
+ */
+int register_netevent_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
+	return err;
+}
+
+/**
+ *	netevent_unregister_notifier - unregister a netevent notifier block
+ *	@nb: notifier
+ *
+ *	Unregister a notifier previously registered by
+ *	register_neigh_notifier(). The notifier is unlinked into the
+ *	kernel structures and may then be reused. A negative errno code
+ *	is returned on a failure.
+ */
+
+int unregister_netevent_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
+}
+
+/**
+ *	call_netevent_notifiers - call all netevent notifier blocks
+ *      @val: value passed unmodified to notifier function
+ *      @v:   pointer passed unmodified to notifier function
+ *
+ *	Call all neighbour notifier blocks.  Parameters and return value
+ *	are as for notifier_call_chain().
+ */
+
+int call_netevent_notifiers(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
+}
+
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
+EXPORT_SYMBOL_GPL(call_netevent_notifiers);
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -0,0 +1,815 @@
+/*
+ * Common framework for low-level network console, dump, and debugger code
+ *
+ * Sep 8 2003  Matt Mackall <mpm@selenic.com>
+ *
+ * based on the netconsole code from:
+ *
+ * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002  Red Hat, Inc.
+ */
+
+#include <linux/smp_lock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <asm/unaligned.h>
+
+/*
+ * We maintain a small pool of fully-sized skbs, to make sure the
+ * message gets out even in extreme OOM situations.
+ */
+
+#define MAX_UDP_CHUNK 1460
+#define MAX_SKBS 32
+#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
+
+static struct sk_buff_head skb_pool;
+
+static atomic_t trapped;
+
+#define USEC_PER_POLL	50
+#define NETPOLL_RX_ENABLED  1
+#define NETPOLL_RX_DROP     2
+
+#define MAX_SKB_SIZE \
+		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
+				sizeof(struct iphdr) + sizeof(struct ethhdr))
+
+static void zap_completion_queue(void);
+static void arp_reply(struct sk_buff *skb);
+
+static void queue_process(struct work_struct *work)
+{
+	struct netpoll_info *npinfo =
+		container_of(work, struct netpoll_info, tx_work.work);
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	while ((skb = skb_dequeue(&npinfo->txq))) {
+		struct net_device *dev = skb->dev;
+
+		if (!netif_device_present(dev) || !netif_running(dev)) {
+			__kfree_skb(skb);
+			continue;
+		}
+
+		local_irq_save(flags);
+		netif_tx_lock(dev);
+		if (netif_queue_stopped(dev) ||
+		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+			skb_queue_head(&npinfo->txq, skb);
+			netif_tx_unlock(dev);
+			local_irq_restore(flags);
+
+			schedule_delayed_work(&npinfo->tx_work, HZ/10);
+			return;
+		}
+		netif_tx_unlock(dev);
+		local_irq_restore(flags);
+	}
+}
+
+static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
+			    unsigned short ulen, __be32 saddr, __be32 daddr)
+{
+	__wsum psum;
+
+	if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
+		return 0;
+
+	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/*
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_owner to avoid recursion on UP
+ * systems where the lock doesn't exist.
+ *
+ * In cases where there is bi-directional communications, reading only
+ * one message at a time can lead to packets being dropped by the
+ * network adapter, forcing superfluous retries and possibly timeouts.
+ * Thus, we set our budget to greater than 1.
+ */
+static void poll_napi(struct netpoll *np)
+{
+	struct netpoll_info *npinfo = np->dev->npinfo;
+	int budget = 16;
+
+	if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
+	    npinfo->poll_owner != smp_processor_id() &&
+	    spin_trylock(&npinfo->poll_lock)) {
+		npinfo->rx_flags |= NETPOLL_RX_DROP;
+		atomic_inc(&trapped);
+
+		np->dev->poll(np->dev, &budget);
+
+		atomic_dec(&trapped);
+		npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+		spin_unlock(&npinfo->poll_lock);
+	}
+}
+
+static void service_arp_queue(struct netpoll_info *npi)
+{
+	struct sk_buff *skb;
+
+	if (unlikely(!npi))
+		return;
+
+	skb = skb_dequeue(&npi->arp_tx);
+
+	while (skb != NULL) {
+		arp_reply(skb);
+		skb = skb_dequeue(&npi->arp_tx);
+	}
+}
+
+void netpoll_poll(struct netpoll *np)
+{
+	if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+		return;
+
+	/* Process pending work on NIC */
+	np->dev->poll_controller(np->dev);
+	if (np->dev->poll)
+		poll_napi(np);
+
+	service_arp_queue(np->dev->npinfo);
+
+	zap_completion_queue();
+}
+
+static void refill_skbs(void)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&skb_pool.lock, flags);
+	while (skb_pool.qlen < MAX_SKBS) {
+		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
+		if (!skb)
+			break;
+
+		__skb_queue_tail(&skb_pool, skb);
+	}
+	spin_unlock_irqrestore(&skb_pool.lock, flags);
+}
+
+static void zap_completion_queue(void)
+{
+	unsigned long flags;
+	struct softnet_data *sd = &get_cpu_var(softnet_data);
+
+	if (sd->completion_queue) {
+		struct sk_buff *clist;
+
+		local_irq_save(flags);
+		clist = sd->completion_queue;
+		sd->completion_queue = NULL;
+		local_irq_restore(flags);
+
+		while (clist != NULL) {
+			struct sk_buff *skb = clist;
+			clist = clist->next;
+			if (skb->destructor)
+				dev_kfree_skb_any(skb); /* put this one back */
+			else
+				__kfree_skb(skb);
+		}
+	}
+
+	put_cpu_var(softnet_data);
+}
+
+static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
+{
+	int count = 0;
+	struct sk_buff *skb;
+
+	zap_completion_queue();
+	refill_skbs();
+repeat:
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		skb = skb_dequeue(&skb_pool);
+
+	if (!skb) {
+		if (++count < 10) {
+			netpoll_poll(np);
+			goto repeat;
+		}
+		return NULL;
+	}
+
+	atomic_set(&skb->users, 1);
+	skb_reserve(skb, reserve);
+	return skb;
+}
+
+static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+{
+	int status = NETDEV_TX_BUSY;
+	unsigned long tries;
+	struct net_device *dev = np->dev;
+	struct netpoll_info *npinfo = np->dev->npinfo;
+
+	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* don't get messages out of order, and no recursion */
+	if (skb_queue_len(&npinfo->txq) == 0 &&
+		    npinfo->poll_owner != smp_processor_id()) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		if (netif_tx_trylock(dev)) {
+			/* try until next clock tick */
+			for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
+					tries > 0; --tries) {
+				if (!netif_queue_stopped(dev))
+					status = dev->hard_start_xmit(skb, dev);
+
+				if (status == NETDEV_TX_OK)
+					break;
+
+				/* tickle device maybe there is some cleanup */
+				netpoll_poll(np);
+
+				udelay(USEC_PER_POLL);
+			}
+			netif_tx_unlock(dev);
+		}
+		local_irq_restore(flags);
+	}
+
+	if (status != NETDEV_TX_OK) {
+		skb_queue_tail(&npinfo->txq, skb);
+		schedule_delayed_work(&npinfo->tx_work,0);
+	}
+}
+
+void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
+{
+	int total_len, eth_len, ip_len, udp_len;
+	struct sk_buff *skb;
+	struct udphdr *udph;
+	struct iphdr *iph;
+	struct ethhdr *eth;
+
+	udp_len = len + sizeof(*udph);
+	ip_len = eth_len = udp_len + sizeof(*iph);
+	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
+
+	skb = find_skb(np, total_len, total_len - len);
+	if (!skb)
+		return;
+
+	memcpy(skb->data, msg, len);
+	skb->len += len;
+
+	skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+	udph->source = htons(np->local_port);
+	udph->dest = htons(np->remote_port);
+	udph->len = htons(udp_len);
+	udph->check = 0;
+	udph->check = csum_tcpudp_magic(htonl(np->local_ip),
+					htonl(np->remote_ip),
+					udp_len, IPPROTO_UDP,
+					csum_partial((unsigned char *)udph, udp_len, 0));
+	if (udph->check == 0)
+		udph->check = CSUM_MANGLED_0;
+
+	skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+
+	/* iph->version = 4; iph->ihl = 5; */
+	put_unaligned(0x45, (unsigned char *)iph);
+	iph->tos      = 0;
+	put_unaligned(htons(ip_len), &(iph->tot_len));
+	iph->id       = 0;
+	iph->frag_off = 0;
+	iph->ttl      = 64;
+	iph->protocol = IPPROTO_UDP;
+	iph->check    = 0;
+	put_unaligned(htonl(np->local_ip), &(iph->saddr));
+	put_unaligned(htonl(np->remote_ip), &(iph->daddr));
+	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+	skb->mac.raw = skb->data;
+	skb->protocol = eth->h_proto = htons(ETH_P_IP);
+	memcpy(eth->h_source, np->local_mac, 6);
+	memcpy(eth->h_dest, np->remote_mac, 6);
+
+	skb->dev = np->dev;
+
+	netpoll_send_skb(np, skb);
+}
+
+static void arp_reply(struct sk_buff *skb)
+{
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct arphdr *arp;
+	unsigned char *arp_ptr;
+	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
+	__be32 sip, tip;
+	unsigned char *sha;
+	struct sk_buff *send_skb;
+	struct netpoll *np = NULL;
+
+	if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
+		np = npinfo->rx_np;
+	if (!np)
+		return;
+
+	/* No arp on this interface */
+	if (skb->dev->flags & IFF_NOARP)
+		return;
+
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * skb->dev->addr_len) +
+				 (2 * sizeof(u32)))))
+		return;
+
+	skb->h.raw = skb->nh.raw = skb->data;
+	arp = skb->nh.arph;
+
+	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+	    arp->ar_pro != htons(ETH_P_IP) ||
+	    arp->ar_op != htons(ARPOP_REQUEST))
+		return;
+
+	arp_ptr = (unsigned char *)(arp+1);
+	/* save the location of the src hw addr */
+	sha = arp_ptr;
+	arp_ptr += skb->dev->addr_len;
+	memcpy(&sip, arp_ptr, 4);
+	arp_ptr += 4;
+	/* if we actually cared about dst hw addr, it would get copied here */
+	arp_ptr += skb->dev->addr_len;
+	memcpy(&tip, arp_ptr, 4);
+
+	/* Should we ignore arp? */
+	if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
+		return;
+
+	size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
+	send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
+			    LL_RESERVED_SPACE(np->dev));
+
+	if (!send_skb)
+		return;
+
+	send_skb->nh.raw = send_skb->data;
+	arp = (struct arphdr *) skb_put(send_skb, size);
+	send_skb->dev = skb->dev;
+	send_skb->protocol = htons(ETH_P_ARP);
+
+	/* Fill the device header for the ARP frame */
+
+	if (np->dev->hard_header &&
+	    np->dev->hard_header(send_skb, skb->dev, ptype,
+				 sha, np->local_mac,
+				 send_skb->len) < 0) {
+		kfree_skb(send_skb);
+		return;
+	}
+
+	/*
+	 * Fill out the arp protocol part.
+	 *
+	 * we only support ethernet device type,
+	 * which (according to RFC 1390) should always equal 1 (Ethernet).
+	 */
+
+	arp->ar_hrd = htons(np->dev->type);
+	arp->ar_pro = htons(ETH_P_IP);
+	arp->ar_hln = np->dev->addr_len;
+	arp->ar_pln = 4;
+	arp->ar_op = htons(type);
+
+	arp_ptr=(unsigned char *)(arp + 1);
+	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+	arp_ptr += np->dev->addr_len;
+	memcpy(arp_ptr, &tip, 4);
+	arp_ptr += 4;
+	memcpy(arp_ptr, sha, np->dev->addr_len);
+	arp_ptr += np->dev->addr_len;
+	memcpy(arp_ptr, &sip, 4);
+
+	netpoll_send_skb(np, send_skb);
+}
+
+int __netpoll_rx(struct sk_buff *skb)
+{
+	int proto, len, ulen;
+	struct iphdr *iph;
+	struct udphdr *uh;
+	struct netpoll_info *npi = skb->dev->npinfo;
+	struct netpoll *np = npi->rx_np;
+
+	if (!np)
+		goto out;
+	if (skb->dev->type != ARPHRD_ETHER)
+		goto out;
+
+	/* check if netpoll clients need ARP */
+	if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+	    atomic_read(&trapped)) {
+		skb_queue_tail(&npi->arp_tx, skb);
+		return 1;
+	}
+
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto != ETH_P_IP)
+		goto out;
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto out;
+	if (skb_shared(skb))
+		goto out;
+
+	iph = (struct iphdr *)skb->data;
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto out;
+	if (iph->ihl < 5 || iph->version != 4)
+		goto out;
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		goto out;
+	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		goto out;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len || len < iph->ihl*4)
+		goto out;
+
+	/*
+	 * Our transport medium may have padded the buffer out.
+	 * Now We trim to the true length of the frame.
+	 */
+	if (pskb_trim_rcsum(skb, len))
+		goto out;
+
+	if (iph->protocol != IPPROTO_UDP)
+		goto out;
+
+	len -= iph->ihl*4;
+	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+	ulen = ntohs(uh->len);
+
+	if (ulen != len)
+		goto out;
+	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+		goto out;
+	if (np->local_ip && np->local_ip != ntohl(iph->daddr))
+		goto out;
+	if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
+		goto out;
+	if (np->local_port && np->local_port != ntohs(uh->dest))
+		goto out;
+
+	np->rx_hook(np, ntohs(uh->source),
+		    (char *)(uh+1),
+		    ulen - sizeof(struct udphdr));
+
+	kfree_skb(skb);
+	return 1;
+
+out:
+	if (atomic_read(&trapped)) {
+		kfree_skb(skb);
+		return 1;
+	}
+
+	return 0;
+}
+
+int netpoll_parse_options(struct netpoll *np, char *opt)
+{
+	char *cur=opt, *delim;
+
+	if (*cur != '@') {
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->local_port = simple_strtol(cur, NULL, 10);
+		cur = delim;
+	}
+	cur++;
+	printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
+
+	if (*cur != '/') {
+		if ((delim = strchr(cur, '/')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->local_ip = ntohl(in_aton(cur));
+		cur = delim;
+
+		printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->local_ip));
+	}
+	cur++;
+
+	if (*cur != ',') {
+		/* parse out dev name */
+		if ((delim = strchr(cur, ',')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
+		cur = delim;
+	}
+	cur++;
+
+	printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
+
+	if (*cur != '@') {
+		/* dst port */
+		if ((delim = strchr(cur, '@')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->remote_port = simple_strtol(cur, NULL, 10);
+		cur = delim;
+	}
+	cur++;
+	printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
+
+	/* dst ip */
+	if ((delim = strchr(cur, '/')) == NULL)
+		goto parse_failed;
+	*delim = 0;
+	np->remote_ip = ntohl(in_aton(cur));
+	cur = delim + 1;
+
+	printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
+	       np->name, HIPQUAD(np->remote_ip));
+
+	if (*cur != 0) {
+		/* MAC address */
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->remote_mac[0] = simple_strtol(cur, NULL, 16);
+		cur = delim + 1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->remote_mac[1] = simple_strtol(cur, NULL, 16);
+		cur = delim + 1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->remote_mac[2] = simple_strtol(cur, NULL, 16);
+		cur = delim + 1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->remote_mac[3] = simple_strtol(cur, NULL, 16);
+		cur = delim + 1;
+		if ((delim = strchr(cur, ':')) == NULL)
+			goto parse_failed;
+		*delim = 0;
+		np->remote_mac[4] = simple_strtol(cur, NULL, 16);
+		cur = delim + 1;
+		np->remote_mac[5] = simple_strtol(cur, NULL, 16);
+	}
+
+	printk(KERN_INFO "%s: remote ethernet address "
+	       "%02x:%02x:%02x:%02x:%02x:%02x\n",
+	       np->name,
+	       np->remote_mac[0],
+	       np->remote_mac[1],
+	       np->remote_mac[2],
+	       np->remote_mac[3],
+	       np->remote_mac[4],
+	       np->remote_mac[5]);
+
+	return 0;
+
+ parse_failed:
+	printk(KERN_INFO "%s: couldn't parse config at %s!\n",
+	       np->name, cur);
+	return -1;
+}
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+	struct netpoll_info *npinfo;
+	unsigned long flags;
+	int err;
+
+	if (np->dev_name)
+		ndev = dev_get_by_name(np->dev_name);
+	if (!ndev) {
+		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+		       np->name, np->dev_name);
+		return -ENODEV;
+	}
+
+	np->dev = ndev;
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto release;
+		}
+
+		npinfo->rx_flags = 0;
+		npinfo->rx_np = NULL;
+		spin_lock_init(&npinfo->poll_lock);
+		npinfo->poll_owner = -1;
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	if (!ndev->poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		err = -ENOTSUPP;
+		goto release;
+	}
+
+	if (!netif_running(ndev)) {
+		unsigned long atmost, atleast;
+
+		printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
+		       np->name, np->dev_name);
+
+		rtnl_lock();
+		err = dev_open(ndev);
+		rtnl_unlock();
+
+		if (err) {
+			printk(KERN_ERR "%s: failed to open %s\n",
+			       np->name, ndev->name);
+			goto release;
+		}
+
+		atleast = jiffies + HZ/10;
+		atmost = jiffies + 4*HZ;
+		while (!netif_carrier_ok(ndev)) {
+			if (time_after(jiffies, atmost)) {
+				printk(KERN_NOTICE
+				       "%s: timeout waiting for carrier\n",
+				       np->name);
+				break;
+			}
+			cond_resched();
+		}
+
+		/* If carrier appears to come up instantly, we don't
+		 * trust it and pause so that we don't pump all our
+		 * queued console messages into the bitbucket.
+		 */
+
+		if (time_before(jiffies, atleast)) {
+			printk(KERN_NOTICE "%s: carrier detect appears"
+			       " untrustworthy, waiting 4 seconds\n",
+			       np->name);
+			msleep(4000);
+		}
+	}
+
+	if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
+		memcpy(np->local_mac, ndev->dev_addr, 6);
+
+	if (!np->local_ip) {
+		rcu_read_lock();
+		in_dev = __in_dev_get_rcu(ndev);
+
+		if (!in_dev || !in_dev->ifa_list) {
+			rcu_read_unlock();
+			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
+			       np->name, np->dev_name);
+			err = -EDESTADDRREQ;
+			goto release;
+		}
+
+		np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
+		rcu_read_unlock();
+		printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+		       np->name, HIPQUAD(np->local_ip));
+	}
+
+	if (np->rx_hook) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+		npinfo->rx_np = np;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	/* fill up the skb queue */
+	refill_skbs();
+
+	/* last thing to do is link it to the net device structure */
+	ndev->npinfo = npinfo;
+
+	/* avoid racing with NAPI reading npinfo */
+	synchronize_rcu();
+
+	return 0;
+
+ release:
+	if (!ndev->npinfo)
+		kfree(npinfo);
+	np->dev = NULL;
+	dev_put(ndev);
+	return err;
+}
+
+static int __init netpoll_init(void)
+{
+	skb_queue_head_init(&skb_pool);
+	return 0;
+}
+core_initcall(netpoll_init);
+
+void netpoll_cleanup(struct netpoll *np)
+{
+	struct netpoll_info *npinfo;
+	unsigned long flags;
+
+	if (np->dev) {
+		npinfo = np->dev->npinfo;
+		if (npinfo) {
+			if (npinfo->rx_np == np) {
+				spin_lock_irqsave(&npinfo->rx_lock, flags);
+				npinfo->rx_np = NULL;
+				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+			}
+
+			np->dev->npinfo = NULL;
+			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				skb_queue_purge(&npinfo->arp_tx);
+				skb_queue_purge(&npinfo->txq);
+				cancel_rearming_delayed_work(&npinfo->tx_work);
+				flush_scheduled_work();
+
+				kfree(npinfo);
+			}
+		}
+
+		dev_put(np->dev);
+	}
+
+	np->dev = NULL;
+}
+
+int netpoll_trap(void)
+{
+	return atomic_read(&trapped);
+}
+
+void netpoll_set_trap(int trap)
+{
+	if (trap)
+		atomic_inc(&trapped);
+	else
+		atomic_dec(&trapped);
+}
+
+EXPORT_SYMBOL(netpoll_set_trap);
+EXPORT_SYMBOL(netpoll_trap);
+EXPORT_SYMBOL(netpoll_parse_options);
+EXPORT_SYMBOL(netpoll_setup);
+EXPORT_SYMBOL(netpoll_cleanup);
+EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll);
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -0,0 +1,102 @@
+/*
+ * NET		Generic infrastructure for Network protocols.
+ *
+ * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * 		From code originally in include/net/tcp.h
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include <net/request_sock.h>
+
+/*
+ * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
+ * One SYN_RECV socket costs about 80bytes on a 32bit machine.
+ * It would be better to replace it with a global counter for all sockets
+ * but then some measure against one socket starving all other sockets
+ * would be needed.
+ *
+ * It was 128 by default. Experiments with real servers show, that
+ * it is absolutely not enough even at 100conn/sec. 256 cures most
+ * of problems. This value is adjusted to 128 for very small machines
+ * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
+ * Note : Dont forget somaxconn that may limit backlog too.
+ */
+int sysctl_max_syn_backlog = 256;
+
+int reqsk_queue_alloc(struct request_sock_queue *queue,
+		      unsigned int nr_table_entries)
+{
+	size_t lopt_size = sizeof(struct listen_sock);
+	struct listen_sock *lopt;
+
+	nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
+	nr_table_entries = max_t(u32, nr_table_entries, 8);
+	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
+	lopt_size += nr_table_entries * sizeof(struct request_sock *);
+	if (lopt_size > PAGE_SIZE)
+		lopt = __vmalloc(lopt_size,
+			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+			PAGE_KERNEL);
+	else
+		lopt = kzalloc(lopt_size, GFP_KERNEL);
+	if (lopt == NULL)
+		return -ENOMEM;
+
+	for (lopt->max_qlen_log = 3;
+	     (1 << lopt->max_qlen_log) < nr_table_entries;
+	     lopt->max_qlen_log++);
+
+	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
+	rwlock_init(&queue->syn_wait_lock);
+	queue->rskq_accept_head = NULL;
+	lopt->nr_table_entries = nr_table_entries;
+
+	write_lock_bh(&queue->syn_wait_lock);
+	queue->listen_opt = lopt;
+	write_unlock_bh(&queue->syn_wait_lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(reqsk_queue_alloc);
+
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+	/* make all the listen_opt local to us */
+	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+	size_t lopt_size = sizeof(struct listen_sock) +
+		lopt->nr_table_entries * sizeof(struct request_sock *);
+
+	if (lopt->qlen != 0) {
+		unsigned int i;
+
+		for (i = 0; i < lopt->nr_table_entries; i++) {
+			struct request_sock *req;
+
+			while ((req = lopt->syn_table[i]) != NULL) {
+				lopt->syn_table[i] = req->dl_next;
+				lopt->qlen--;
+				reqsk_free(req);
+			}
+		}
+	}
+
+	BUG_TRAP(lopt->qlen == 0);
+	if (lopt_size > PAGE_SIZE)
+		vfree(lopt);
+	else
+		kfree(lopt);
+}
+
+EXPORT_SYMBOL(reqsk_queue_destroy);
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -0,0 +1,896 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Routing netlink socket interface: protocol independent part.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <linux/mutex.h>
+#include <linux/if_addr.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/string.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/fib_rules.h>
+#include <net/netlink.h>
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+#include <linux/wireless.h>
+#include <net/iw_handler.h>
+#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
+
+static DEFINE_MUTEX(rtnl_mutex);
+static struct sock *rtnl;
+
+void rtnl_lock(void)
+{
+	mutex_lock(&rtnl_mutex);
+}
+
+void __rtnl_unlock(void)
+{
+	mutex_unlock(&rtnl_mutex);
+}
+
+void rtnl_unlock(void)
+{
+	mutex_unlock(&rtnl_mutex);
+	if (rtnl && rtnl->sk_receive_queue.qlen)
+		rtnl->sk_data_ready(rtnl, 0);
+	netdev_run_todo();
+}
+
+int rtnl_trylock(void)
+{
+	return mutex_trylock(&rtnl_mutex);
+}
+
+int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
+{
+	memset(tb, 0, sizeof(struct rtattr*)*maxattr);
+
+	while (RTA_OK(rta, len)) {
+		unsigned flavor = rta->rta_type;
+		if (flavor && flavor <= maxattr)
+			tb[flavor-1] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+	return 0;
+}
+
+struct rtnetlink_link * rtnetlink_links[NPROTO];
+
+static const int rtm_min[RTM_NR_FAMILIES] =
+{
+	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
+	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
+	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
+	[RTM_FAM(RTM_NEWACTION)]    = NLMSG_LENGTH(sizeof(struct tcamsg)),
+	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
+};
+
+static const int rta_max[RTM_NR_FAMILIES] =
+{
+	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
+	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
+	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
+	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
+	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
+	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
+	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
+	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
+};
+
+void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+	struct rtattr *rta;
+	int size = RTA_LENGTH(attrlen);
+
+	rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
+	rta->rta_type = attrtype;
+	rta->rta_len = size;
+	memcpy(RTA_DATA(rta), data, attrlen);
+	memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
+}
+
+size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
+{
+	size_t ret = RTA_PAYLOAD(rta);
+	char *src = RTA_DATA(rta);
+
+	if (ret > 0 && src[ret - 1] == '\0')
+		ret--;
+	if (size > 0) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		memset(dest, 0, size);
+		memcpy(dest, src, len);
+	}
+	return ret;
+}
+
+int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+	int err = 0;
+
+	NETLINK_CB(skb).dst_group = group;
+	if (echo)
+		atomic_inc(&skb->users);
+	netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
+	if (echo)
+		err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+	return err;
+}
+
+int rtnl_unicast(struct sk_buff *skb, u32 pid)
+{
+	return nlmsg_unicast(rtnl, skb, pid);
+}
+
+int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		struct nlmsghdr *nlh, gfp_t flags)
+{
+	int report = 0;
+
+	if (nlh)
+		report = nlmsg_report(nlh);
+
+	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+
+void rtnl_set_sk_err(u32 group, int error)
+{
+	netlink_set_err(rtnl, 0, group, error);
+}
+
+int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
+{
+	struct nlattr *mx;
+	int i, valid = 0;
+
+	mx = nla_nest_start(skb, RTA_METRICS);
+	if (mx == NULL)
+		return -ENOBUFS;
+
+	for (i = 0; i < RTAX_MAX; i++) {
+		if (metrics[i]) {
+			valid++;
+			NLA_PUT_U32(skb, i+1, metrics[i]);
+		}
+	}
+
+	if (!valid) {
+		nla_nest_cancel(skb, mx);
+		return 0;
+	}
+
+	return nla_nest_end(skb, mx);
+
+nla_put_failure:
+	return nla_nest_cancel(skb, mx);
+}
+
+int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
+		       u32 ts, u32 tsage, long expires, u32 error)
+{
+	struct rta_cacheinfo ci = {
+		.rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+		.rta_used = dst->__use,
+		.rta_clntref = atomic_read(&(dst->__refcnt)),
+		.rta_error = error,
+		.rta_id =  id,
+		.rta_ts = ts,
+		.rta_tsage = tsage,
+	};
+
+	if (expires)
+		ci.rta_expires = jiffies_to_clock_t(expires);
+
+	return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
+
+static void set_operstate(struct net_device *dev, unsigned char transition)
+{
+	unsigned char operstate = dev->operstate;
+
+	switch(transition) {
+	case IF_OPER_UP:
+		if ((operstate == IF_OPER_DORMANT ||
+		     operstate == IF_OPER_UNKNOWN) &&
+		    !netif_dormant(dev))
+			operstate = IF_OPER_UP;
+		break;
+
+	case IF_OPER_DORMANT:
+		if (operstate == IF_OPER_UP ||
+		    operstate == IF_OPER_UNKNOWN)
+			operstate = IF_OPER_DORMANT;
+		break;
+	};
+
+	if (dev->operstate != operstate) {
+		write_lock_bh(&dev_base_lock);
+		dev->operstate = operstate;
+		write_unlock_bh(&dev_base_lock);
+		netdev_state_change(dev);
+	}
+}
+
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+				 struct net_device_stats *b)
+{
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+};
+
+static inline size_t if_nlmsg_size(int iwbuflen)
+{
+	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
+	       + nla_total_size(sizeof(struct rtnl_link_stats))
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+	       + nla_total_size(4) /* IFLA_TXQLEN */
+	       + nla_total_size(4) /* IFLA_WEIGHT */
+	       + nla_total_size(4) /* IFLA_MTU */
+	       + nla_total_size(4) /* IFLA_LINK */
+	       + nla_total_size(4) /* IFLA_MASTER */
+	       + nla_total_size(1) /* IFLA_OPERSTATE */
+	       + nla_total_size(1) /* IFLA_LINKMODE */
+	       + nla_total_size(iwbuflen);
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+			    void *iwbuf, int iwbuflen, int type, u32 pid,
+			    u32 seq, u32 change, unsigned int flags)
+{
+	struct ifinfomsg *ifm;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->__ifi_pad = 0;
+	ifm->ifi_type = dev->type;
+	ifm->ifi_index = dev->ifindex;
+	ifm->ifi_flags = dev_get_flags(dev);
+	ifm->ifi_change = change;
+
+	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+	NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+	NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
+	NLA_PUT_U8(skb, IFLA_OPERSTATE,
+		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+
+	if (dev->ifindex != dev->iflink)
+		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+	if (dev->master)
+		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
+
+	if (dev->qdisc_sleeping)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
+
+	if (1) {
+		struct rtnl_link_ifmap map = {
+			.mem_start   = dev->mem_start,
+			.mem_end     = dev->mem_end,
+			.base_addr   = dev->base_addr,
+			.irq         = dev->irq,
+			.dma         = dev->dma,
+			.port        = dev->if_port,
+		};
+		NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+	}
+
+	if (dev->addr_len) {
+		NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
+	}
+
+	if (dev->get_stats) {
+		struct net_device_stats *stats = dev->get_stats(dev);
+		if (stats) {
+			struct nlattr *attr;
+
+			attr = nla_reserve(skb, IFLA_STATS,
+					   sizeof(struct rtnl_link_stats));
+			if (attr == NULL)
+				goto nla_put_failure;
+
+			copy_rtnl_link_stats(nla_data(attr), stats);
+		}
+	}
+
+	if (iwbuf)
+		NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->args[0];
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
+				     NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
+			break;
+	}
+	read_unlock(&dev_base_lock);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
+	[IFLA_MTU]		= { .type = NLA_U32 },
+	[IFLA_TXQLEN]		= { .type = NLA_U32 },
+	[IFLA_WEIGHT]		= { .type = NLA_U32 },
+	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
+	[IFLA_LINKMODE]		= { .type = NLA_U8 },
+};
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	int err, send_addr_notify = 0, modified = 0;
+	struct nlattr *tb[IFLA_MAX+1];
+	char ifname[IFNAMSIZ];
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
+
+	err = -EINVAL;
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index >= 0)
+		dev = dev_get_by_index(ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = dev_get_by_name(ifname);
+	else
+		goto errout;
+
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
+	if (tb[IFLA_ADDRESS] &&
+	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+		goto errout_dev;
+
+	if (tb[IFLA_BROADCAST] &&
+	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+		goto errout_dev;
+
+	if (tb[IFLA_MAP]) {
+		struct rtnl_link_ifmap *u_map;
+		struct ifmap k_map;
+
+		if (!dev->set_config) {
+			err = -EOPNOTSUPP;
+			goto errout_dev;
+		}
+
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto errout_dev;
+		}
+
+		u_map = nla_data(tb[IFLA_MAP]);
+		k_map.mem_start = (unsigned long) u_map->mem_start;
+		k_map.mem_end = (unsigned long) u_map->mem_end;
+		k_map.base_addr = (unsigned short) u_map->base_addr;
+		k_map.irq = (unsigned char) u_map->irq;
+		k_map.dma = (unsigned char) u_map->dma;
+		k_map.port = (unsigned char) u_map->port;
+
+		err = dev->set_config(dev, &k_map);
+		if (err < 0)
+			goto errout_dev;
+
+		modified = 1;
+	}
+
+	if (tb[IFLA_ADDRESS]) {
+		struct sockaddr *sa;
+		int len;
+
+		if (!dev->set_mac_address) {
+			err = -EOPNOTSUPP;
+			goto errout_dev;
+		}
+
+		if (!netif_device_present(dev)) {
+			err = -ENODEV;
+			goto errout_dev;
+		}
+
+		len = sizeof(sa_family_t) + dev->addr_len;
+		sa = kmalloc(len, GFP_KERNEL);
+		if (!sa) {
+			err = -ENOMEM;
+			goto errout_dev;
+		}
+		sa->sa_family = dev->type;
+		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+		       dev->addr_len);
+		err = dev->set_mac_address(dev, sa);
+		kfree(sa);
+		if (err)
+			goto errout_dev;
+		send_addr_notify = 1;
+		modified = 1;
+	}
+
+	if (tb[IFLA_MTU]) {
+		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
+	}
+
+	/*
+	 * Interface selected by interface index but interface
+	 * name provided implies that a name change has been
+	 * requested.
+	 */
+	if (ifm->ifi_index >= 0 && ifname[0]) {
+		err = dev_change_name(dev, ifname);
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
+	}
+
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+	if (tb[IFLA_WIRELESS]) {
+		/* Call Wireless Extensions.
+		 * Various stuff checked in there... */
+		err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]));
+		if (err < 0)
+			goto errout_dev;
+	}
+#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
+
+	if (tb[IFLA_BROADCAST]) {
+		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+		send_addr_notify = 1;
+	}
+
+
+	if (ifm->ifi_flags)
+		dev_change_flags(dev, ifm->ifi_flags);
+
+	if (tb[IFLA_TXQLEN])
+		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+
+	if (tb[IFLA_WEIGHT])
+		dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
+
+	if (tb[IFLA_OPERSTATE])
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+
+	if (tb[IFLA_LINKMODE]) {
+		write_lock_bh(&dev_base_lock);
+		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+		write_unlock_bh(&dev_base_lock);
+	}
+
+	err = 0;
+
+errout_dev:
+	if (err < 0 && modified && net_ratelimit())
+		printk(KERN_WARNING "A link change request failed with "
+		       "some changes comitted already. Interface %s may "
+		       "have been left with an inconsistent configuration, "
+		       "please check.\n", dev->name);
+
+	if (send_addr_notify)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
+	dev_put(dev);
+errout:
+	return err;
+}
+
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct ifinfomsg *ifm;
+	struct nlattr *tb[IFLA_MAX+1];
+	struct net_device *dev = NULL;
+	struct sk_buff *nskb;
+	char *iw_buf = NULL, *iw = NULL;
+	int iw_buf_len = 0;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index >= 0) {
+		dev = dev_get_by_index(ifm->ifi_index);
+		if (dev == NULL)
+			return -ENODEV;
+	} else
+		return -EINVAL;
+
+
+#ifdef CONFIG_NET_WIRELESS_RTNETLINK
+	if (tb[IFLA_WIRELESS]) {
+		/* Call Wireless Extensions. We need to know the size before
+		 * we can alloc. Various stuff checked in there... */
+		err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]),
+					     &iw_buf, &iw_buf_len);
+		if (err < 0)
+			goto errout;
+
+		/* Payload is at an offset in buffer */
+		iw = iw_buf + IW_EV_POINT_OFF;
+	}
+#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
+
+	nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
+	if (nskb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
+			       NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in if_nlmsg_size */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(nskb);
+		goto errout;
+	}
+	err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
+errout:
+	kfree(iw_buf);
+	dev_put(dev);
+
+	return err;
+}
+
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx;
+	int s_idx = cb->family;
+
+	if (s_idx == 0)
+		s_idx = 1;
+	for (idx=1; idx<NPROTO; idx++) {
+		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		if (idx < s_idx || idx == PF_PACKET)
+			continue;
+		if (rtnetlink_links[idx] == NULL ||
+		    rtnetlink_links[idx][type].dumpit == NULL)
+			continue;
+		if (idx > s_idx)
+			memset(&cb->args[0], 0, sizeof(cb->args));
+		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+			break;
+	}
+	cb->family = idx;
+
+	return skb->len;
+}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
+}
+
+/* Protected by RTNL sempahore.  */
+static struct rtattr **rta_buf;
+static int rtattr_max;
+
+/* Process one rtnetlink message. */
+
+static __inline__ int
+rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+{
+	struct rtnetlink_link *link;
+	struct rtnetlink_link *link_tab;
+	int sz_idx, kind;
+	int min_len;
+	int family;
+	int type;
+	int err;
+
+	/* Only requests are handled by kernel now */
+	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+		return 0;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < RTM_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type > RTM_MAX)
+		goto err_inval;
+
+	type -= RTM_BASE;
+
+	/* All the messages must have at least 1 byte length */
+	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
+		return 0;
+
+	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+	if (family >= NPROTO) {
+		*errp = -EAFNOSUPPORT;
+		return -1;
+	}
+
+	link_tab = rtnetlink_links[family];
+	if (link_tab == NULL)
+		link_tab = rtnetlink_links[PF_UNSPEC];
+	link = &link_tab[type];
+
+	sz_idx = type>>2;
+	kind = type&3;
+
+	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
+		*errp = -EPERM;
+		return -1;
+	}
+
+	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+		if (link->dumpit == NULL)
+			link = &(rtnetlink_links[PF_UNSPEC][type]);
+
+		if (link->dumpit == NULL)
+			goto err_inval;
+
+		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
+						link->dumpit, NULL)) != 0) {
+			return -1;
+		}
+
+		netlink_queue_skip(nlh, skb);
+		return -1;
+	}
+
+	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
+
+	min_len = rtm_min[sz_idx];
+	if (nlh->nlmsg_len < min_len)
+		goto err_inval;
+
+	if (nlh->nlmsg_len > min_len) {
+		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+		struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
+
+		while (RTA_OK(attr, attrlen)) {
+			unsigned flavor = attr->rta_type;
+			if (flavor) {
+				if (flavor > rta_max[sz_idx])
+					goto err_inval;
+				rta_buf[flavor-1] = attr;
+			}
+			attr = RTA_NEXT(attr, attrlen);
+		}
+	}
+
+	if (link->doit == NULL)
+		link = &(rtnetlink_links[PF_UNSPEC][type]);
+	if (link->doit == NULL)
+		goto err_inval;
+	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+
+	*errp = err;
+	return err;
+
+err_inval:
+	*errp = -EINVAL;
+	return -1;
+}
+
+static void rtnetlink_rcv(struct sock *sk, int len)
+{
+	unsigned int qlen = 0;
+
+	do {
+		mutex_lock(&rtnl_mutex);
+		netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
+		mutex_unlock(&rtnl_mutex);
+
+		netdev_run_todo();
+	} while (qlen);
+}
+
+static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
+{
+	[RTM_GETLINK     - RTM_BASE] = { .doit   = rtnl_getlink,
+					 .dumpit = rtnl_dump_ifinfo	 },
+	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
+	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
+	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
+	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
+	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
+	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
+#ifdef CONFIG_FIB_RULES
+	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
+	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
+#endif
+	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
+	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
+	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
+};
+
+static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+		break;
+	case NETDEV_REGISTER:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		break;
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		break;
+	case NETDEV_CHANGE:
+	case NETDEV_GOING_DOWN:
+		break;
+	default:
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rtnetlink_dev_notifier = {
+	.notifier_call	= rtnetlink_event,
+};
+
+void __init rtnetlink_init(void)
+{
+	int i;
+
+	rtattr_max = 0;
+	for (i = 0; i < ARRAY_SIZE(rta_max); i++)
+		if (rta_max[i] > rtattr_max)
+			rtattr_max = rta_max[i];
+	rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
+	if (!rta_buf)
+		panic("rtnetlink_init: cannot allocate rta_buf\n");
+
+	rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+				     THIS_MODULE);
+	if (rtnl == NULL)
+		panic("rtnetlink_init: cannot initialize rtnetlink\n");
+	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
+	register_netdevice_notifier(&rtnetlink_dev_notifier);
+	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
+	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+}
+
+EXPORT_SYMBOL(__rta_fill);
+EXPORT_SYMBOL(rtattr_strlcpy);
+EXPORT_SYMBOL(rtattr_parse);
+EXPORT_SYMBOL(rtnetlink_links);
+EXPORT_SYMBOL(rtnetlink_put_metrics);
+EXPORT_SYMBOL(rtnl_lock);
+EXPORT_SYMBOL(rtnl_trylock);
+EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_unicast);
+EXPORT_SYMBOL(rtnl_notify);
+EXPORT_SYMBOL(rtnl_set_sk_err);
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -0,0 +1,290 @@
+/* scm.c - Socket level control messages processing.
+ *
+ * Author:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Alignment and value checking mods by Craig Metz
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/security.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <net/protocol.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/compat.h>
+#include <net/scm.h>
+
+
+/*
+ *	Only allow a user to send credentials, that they could set with
+ *	setu(g)id.
+ */
+
+static __inline__ int scm_check_creds(struct ucred *creds)
+{
+	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	    ((creds->uid == current->uid || creds->uid == current->euid ||
+	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
+	    ((creds->gid == current->gid || creds->gid == current->egid ||
+	      creds->gid == current->sgid) || capable(CAP_SETGID))) {
+	       return 0;
+	}
+	return -EPERM;
+}
+
+static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+{
+	int *fdp = (int*)CMSG_DATA(cmsg);
+	struct scm_fp_list *fpl = *fplp;
+	struct file **fpp;
+	int i, num;
+
+	num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
+
+	if (num <= 0)
+		return 0;
+
+	if (num > SCM_MAX_FD)
+		return -EINVAL;
+
+	if (!fpl)
+	{
+		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		if (!fpl)
+			return -ENOMEM;
+		*fplp = fpl;
+		fpl->count = 0;
+	}
+	fpp = &fpl->fp[fpl->count];
+
+	if (fpl->count + num > SCM_MAX_FD)
+		return -EINVAL;
+
+	/*
+	 *	Verify the descriptors and increment the usage count.
+	 */
+
+	for (i=0; i< num; i++)
+	{
+		int fd = fdp[i];
+		struct file *file;
+
+		if (fd < 0 || !(file = fget(fd)))
+			return -EBADF;
+		*fpp++ = file;
+		fpl->count++;
+	}
+	return num;
+}
+
+void __scm_destroy(struct scm_cookie *scm)
+{
+	struct scm_fp_list *fpl = scm->fp;
+	int i;
+
+	if (fpl) {
+		scm->fp = NULL;
+		for (i=fpl->count-1; i>=0; i--)
+			fput(fpl->fp[i]);
+		kfree(fpl);
+	}
+}
+
+int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
+{
+	struct cmsghdr *cmsg;
+	int err;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
+	{
+		err = -EINVAL;
+
+		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
+		/* The first check was omitted in <= 2.2.5. The reasoning was
+		   that parser checks cmsg_len in any case, so that
+		   additional check would be work duplication.
+		   But if cmsg_level is not SOL_SOCKET, we do not check
+		   for too short ancillary data object at all! Oops.
+		   OK, let's add it...
+		 */
+		if (!CMSG_OK(msg, cmsg))
+			goto error;
+
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+
+		switch (cmsg->cmsg_type)
+		{
+		case SCM_RIGHTS:
+			err=scm_fp_copy(cmsg, &p->fp);
+			if (err<0)
+				goto error;
+			break;
+		case SCM_CREDENTIALS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
+				goto error;
+			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+			err = scm_check_creds(&p->creds);
+			if (err)
+				goto error;
+			break;
+		default:
+			goto error;
+		}
+	}
+
+	if (p->fp && !p->fp->count)
+	{
+		kfree(p->fp);
+		p->fp = NULL;
+	}
+	return 0;
+
+error:
+	scm_destroy(p);
+	return err;
+}
+
+int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
+{
+	struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
+	struct cmsghdr cmhdr;
+	int cmlen = CMSG_LEN(len);
+	int err;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags)
+		return put_cmsg_compat(msg, level, type, len, data);
+
+	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return 0; /* XXX: return error? check spec. */
+	}
+	if (msg->msg_controllen < cmlen) {
+		msg->msg_flags |= MSG_CTRUNC;
+		cmlen = msg->msg_controllen;
+	}
+	cmhdr.cmsg_level = level;
+	cmhdr.cmsg_type = type;
+	cmhdr.cmsg_len = cmlen;
+
+	err = -EFAULT;
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
+		goto out;
+	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+		goto out;
+	cmlen = CMSG_SPACE(len);
+	msg->msg_control += cmlen;
+	msg->msg_controllen -= cmlen;
+	err = 0;
+out:
+	return err;
+}
+
+void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
+{
+	struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
+
+	int fdmax = 0;
+	int fdnum = scm->fp->count;
+	struct file **fp = scm->fp->fp;
+	int __user *cmfptr;
+	int err = 0, i;
+
+	if (MSG_CMSG_COMPAT & msg->msg_flags) {
+		scm_detach_fds_compat(msg, scm);
+		return;
+	}
+
+	if (msg->msg_controllen > sizeof(struct cmsghdr))
+		fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
+			 / sizeof(int));
+
+	if (fdnum < fdmax)
+		fdmax = fdnum;
+
+	for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+	{
+		int new_fd;
+		err = security_file_receive(fp[i]);
+		if (err)
+			break;
+		err = get_unused_fd();
+		if (err < 0)
+			break;
+		new_fd = err;
+		err = put_user(new_fd, cmfptr);
+		if (err) {
+			put_unused_fd(new_fd);
+			break;
+		}
+		/* Bump the usage count and install the file. */
+		get_file(fp[i]);
+		fd_install(new_fd, fp[i]);
+	}
+
+	if (i > 0)
+	{
+		int cmlen = CMSG_LEN(i*sizeof(int));
+		err = put_user(SOL_SOCKET, &cm->cmsg_level);
+		if (!err)
+			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+		if (!err)
+			err = put_user(cmlen, &cm->cmsg_len);
+		if (!err) {
+			cmlen = CMSG_SPACE(i*sizeof(int));
+			msg->msg_control += cmlen;
+			msg->msg_controllen -= cmlen;
+		}
+	}
+	if (i < fdnum || (fdnum && fdmax <= 0))
+		msg->msg_flags |= MSG_CTRUNC;
+
+	/*
+	 * All of the files that fit in the message have had their
+	 * usage counts incremented, so we just free the list.
+	 */
+	__scm_destroy(scm);
+}
+
+struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
+{
+	struct scm_fp_list *new_fpl;
+	int i;
+
+	if (!fpl)
+		return NULL;
+
+	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	if (new_fpl) {
+		for (i=fpl->count-1; i>=0; i--)
+			get_file(fpl->fp[i]);
+		memcpy(new_fpl, fpl, sizeof(*fpl));
+	}
+	return new_fpl;
+}
+
+EXPORT_SYMBOL(__scm_destroy);
+EXPORT_SYMBOL(__scm_send);
+EXPORT_SYMBOL(put_cmsg);
+EXPORT_SYMBOL(scm_detach_fds);
+EXPORT_SYMBOL(scm_fp_dup);
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
--- a/net/core/sock.c
+++ b/net/core/sock.c
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -0,0 +1,290 @@
+/*
+ *     SUCS NET3:
+ *
+ *     Generic stream handling routines. These are generic for most
+ *     protocols. Even IP. Tonight 8-).
+ *     This is used because TCP, LLC (others too) layer all have mostly
+ *     identical sendmsg() and recvmsg() code.
+ *     So we (will) share it here.
+ *
+ *     Authors:        Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *                     (from old tcp.c code)
+ *                     Alan Cox <alan@redhat.com> (Borrowed comments 8-))
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/signal.h>
+#include <linux/tcp.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+/**
+ * sk_stream_write_space - stream socket write_space callback.
+ * @sk: socket
+ *
+ * FIXME: write proper description
+ */
+void sk_stream_write_space(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+			sock_wake_async(sock, 2, POLL_OUT);
+	}
+}
+
+EXPORT_SYMBOL(sk_stream_write_space);
+
+/**
+ * sk_stream_wait_connect - Wait for a socket to get into the connected state
+ * @sk: sock to wait on
+ * @timeo_p: for how long to wait
+ *
+ * Must be called with the socket locked.
+ */
+int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
+{
+	struct task_struct *tsk = current;
+	DEFINE_WAIT(wait);
+	int done;
+
+	do {
+		int err = sock_error(sk);
+		if (err)
+			return err;
+		if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
+			return -EPIPE;
+		if (!*timeo_p)
+			return -EAGAIN;
+		if (signal_pending(tsk))
+			return sock_intr_errno(*timeo_p);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		sk->sk_write_pending++;
+		done = sk_wait_event(sk, timeo_p,
+				     !sk->sk_err &&
+				     !((1 << sk->sk_state) &
+				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
+		finish_wait(sk->sk_sleep, &wait);
+		sk->sk_write_pending--;
+	} while (!done);
+	return 0;
+}
+
+EXPORT_SYMBOL(sk_stream_wait_connect);
+
+/**
+ * sk_stream_closing - Return 1 if we still have things to send in our buffers.
+ * @sk: socket to verify
+ */
+static inline int sk_stream_closing(struct sock *sk)
+{
+	return (1 << sk->sk_state) &
+	       (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
+}
+
+void sk_stream_wait_close(struct sock *sk, long timeout)
+{
+	if (timeout) {
+		DEFINE_WAIT(wait);
+
+		do {
+			prepare_to_wait(sk->sk_sleep, &wait,
+					TASK_INTERRUPTIBLE);
+			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
+				break;
+		} while (!signal_pending(current) && timeout);
+
+		finish_wait(sk->sk_sleep, &wait);
+	}
+}
+
+EXPORT_SYMBOL(sk_stream_wait_close);
+
+/**
+ * sk_stream_wait_memory - Wait for more memory for a socket
+ * @sk: socket to wait for memory
+ * @timeo_p: for how long
+ */
+int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
+{
+	int err = 0;
+	long vm_wait = 0;
+	long current_timeo = *timeo_p;
+	DEFINE_WAIT(wait);
+
+	if (sk_stream_memory_free(sk))
+		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
+
+	while (1) {
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
+		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+			goto do_error;
+		if (!*timeo_p)
+			goto do_nonblock;
+		if (signal_pending(current))
+			goto do_interrupted;
+		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		if (sk_stream_memory_free(sk) && !vm_wait)
+			break;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		sk->sk_write_pending++;
+		sk_wait_event(sk, &current_timeo, !sk->sk_err &&
+						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+						  sk_stream_memory_free(sk) &&
+						  vm_wait);
+		sk->sk_write_pending--;
+
+		if (vm_wait) {
+			vm_wait -= current_timeo;
+			current_timeo = *timeo_p;
+			if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
+			    (current_timeo -= vm_wait) < 0)
+				current_timeo = 0;
+			vm_wait = 0;
+		}
+		*timeo_p = current_timeo;
+	}
+out:
+	finish_wait(sk->sk_sleep, &wait);
+	return err;
+
+do_error:
+	err = -EPIPE;
+	goto out;
+do_nonblock:
+	err = -EAGAIN;
+	goto out;
+do_interrupted:
+	err = sock_intr_errno(*timeo_p);
+	goto out;
+}
+
+EXPORT_SYMBOL(sk_stream_wait_memory);
+
+void sk_stream_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	skb_truesize_check(skb);
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+	sk->sk_forward_alloc += skb->truesize;
+}
+
+EXPORT_SYMBOL(sk_stream_rfree);
+
+int sk_stream_error(struct sock *sk, int flags, int err)
+{
+	if (err == -EPIPE)
+		err = sock_error(sk) ? : -EPIPE;
+	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	return err;
+}
+
+EXPORT_SYMBOL(sk_stream_error);
+
+void __sk_stream_mem_reclaim(struct sock *sk)
+{
+	atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
+		   sk->sk_prot->memory_allocated);
+	sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
+	if (*sk->sk_prot->memory_pressure &&
+	    (atomic_read(sk->sk_prot->memory_allocated) <
+	     sk->sk_prot->sysctl_mem[0]))
+		*sk->sk_prot->memory_pressure = 0;
+}
+
+EXPORT_SYMBOL(__sk_stream_mem_reclaim);
+
+int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
+{
+	int amt = sk_stream_pages(size);
+
+	sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
+	atomic_add(amt, sk->sk_prot->memory_allocated);
+
+	/* Under limit. */
+	if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
+		if (*sk->sk_prot->memory_pressure)
+			*sk->sk_prot->memory_pressure = 0;
+		return 1;
+	}
+
+	/* Over hard limit. */
+	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
+		sk->sk_prot->enter_memory_pressure();
+		goto suppress_allocation;
+	}
+
+	/* Under pressure. */
+	if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
+		sk->sk_prot->enter_memory_pressure();
+
+	if (kind) {
+		if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
+			return 1;
+	} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
+		return 1;
+
+	if (!*sk->sk_prot->memory_pressure ||
+	    sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
+				sk_stream_pages(sk->sk_wmem_queued +
+						atomic_read(&sk->sk_rmem_alloc) +
+						sk->sk_forward_alloc))
+		return 1;
+
+suppress_allocation:
+
+	if (!kind) {
+		sk_stream_moderate_sndbuf(sk);
+
+		/* Fail only if socket is _under_ its sndbuf.
+		 * In this case we cannot block, so that we have to fail.
+		 */
+		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+			return 1;
+	}
+
+	/* Alas. Undo changes. */
+	sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
+	atomic_sub(amt, sk->sk_prot->memory_allocated);
+	return 0;
+}
+
+EXPORT_SYMBOL(sk_stream_mem_schedule);
+
+void sk_stream_kill_queues(struct sock *sk)
+{
+	/* First the read buffer. */
+	__skb_queue_purge(&sk->sk_receive_queue);
+
+	/* Next, the error queue. */
+	__skb_queue_purge(&sk->sk_error_queue);
+
+	/* Next, the write queue. */
+	BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
+
+	/* Account for returned memory. */
+	sk_stream_mem_reclaim(sk);
+
+	BUG_TRAP(!sk->sk_wmem_queued);
+	BUG_TRAP(!sk->sk_forward_alloc);
+
+	/* It is _impossible_ for the backlog to contain anything
+	 * when we get here.  All user references to this socket
+	 * have gone away, only the net layer knows can touch it.
+	 */
+}
+
+EXPORT_SYMBOL(sk_stream_kill_queues);
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -0,0 +1,142 @@
+/* -*- linux-c -*-
+ * sysctl_net_core.c: sysctl interface to net core subsystem.
+ *
+ * Begun April 1, 1996, Mike Shaver.
+ * Added /proc/sys/net/core directory entry (empty =) ). [MS]
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <net/sock.h>
+
+#ifdef CONFIG_SYSCTL
+
+extern int netdev_max_backlog;
+extern int weight_p;
+
+extern __u32 sysctl_wmem_max;
+extern __u32 sysctl_rmem_max;
+
+extern int sysctl_core_destroy_delay;
+
+#ifdef CONFIG_XFRM
+extern u32 sysctl_xfrm_aevent_etime;
+extern u32 sysctl_xfrm_aevent_rseqth;
+#endif
+
+ctl_table core_table[] = {
+#ifdef CONFIG_NET
+	{
+		.ctl_name	= NET_CORE_WMEM_MAX,
+		.procname	= "wmem_max",
+		.data		= &sysctl_wmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_RMEM_MAX,
+		.procname	= "rmem_max",
+		.data		= &sysctl_rmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_WMEM_DEFAULT,
+		.procname	= "wmem_default",
+		.data		= &sysctl_wmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_RMEM_DEFAULT,
+		.procname	= "rmem_default",
+		.data		= &sysctl_rmem_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_DEV_WEIGHT,
+		.procname	= "dev_weight",
+		.data		= &weight_p,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MAX_BACKLOG,
+		.procname	= "netdev_max_backlog",
+		.data		= &netdev_max_backlog,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_MSG_COST,
+		.procname	= "message_cost",
+		.data		= &net_msg_cost,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+		.strategy	= &sysctl_jiffies,
+	},
+	{
+		.ctl_name	= NET_CORE_MSG_BURST,
+		.procname	= "message_burst",
+		.data		= &net_msg_burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CORE_OPTMEM_MAX,
+		.procname	= "optmem_max",
+		.data		= &sysctl_optmem_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#ifdef CONFIG_XFRM
+	{
+		.ctl_name	= NET_CORE_AEVENT_ETIME,
+		.procname	= "xfrm_aevent_etime",
+		.data		= &sysctl_xfrm_aevent_etime,
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_AEVENT_RSEQTH,
+		.procname	= "xfrm_aevent_rseqth",
+		.data		= &sysctl_xfrm_aevent_rseqth,
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+#endif /* CONFIG_XFRM */
+#endif /* CONFIG_NET */
+	{
+		.ctl_name	= NET_CORE_SOMAXCONN,
+		.procname	= "somaxconn",
+		.data		= &sysctl_somaxconn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
+		.ctl_name	= NET_CORE_BUDGET,
+		.procname	= "netdev_budget",
+		.data		= &netdev_budget,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{ .ctl_name = 0 }
+};
+
+#endif
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h> /* for BUG_TRAP */
+#include <net/tcp.h>
+#include <net/netdma.h>
+
+#define NET_DMA_DEFAULT_COPYBREAK 4096
+
+int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
+
+/**
+ *	dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb - buffer to copy
+ *	@offset - offset in the buffer to start copying from
+ *	@iovec - io vector to copy to
+ *	@len - amount of data to copy from buffer to iovec
+ *	@pinned_list - locked iovec buffer data
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
+			struct sk_buff *skb, int offset, struct iovec *to,
+			size_t len, struct dma_pinned_list *pinned_list)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	dma_cookie_t cookie = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
+					    skb->data + offset, copy);
+		if (cookie < 0)
+			goto fault;
+		len -= copy;
+		if (len == 0)
+			goto end;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		copy = end - offset;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+
+			cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
+					frag->page_offset + offset - start, copy);
+			if (cookie < 0)
+				goto fault;
+			len -= copy;
+			if (len == 0)
+				goto end;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			copy = end - offset;
+			if (copy > 0) {
+				if (copy > len)
+					copy = len;
+				cookie = dma_skb_copy_datagram_iovec(chan, list,
+						offset - start, to, copy,
+						pinned_list);
+				if (cookie < 0)
+					goto fault;
+				len -= copy;
+				if (len == 0)
+					goto end;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+
+end:
+	if (!len) {
+		skb->dma_cookie = cookie;
+		return cookie;
+	}
+
+fault:
+	return -EFAULT;
+}
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -0,0 +1,292 @@
+/*
+ *	Generic address resultion entity
+ *
+ *	Authors:
+ *	net_random Alan Cox
+ *	net_ratelimit Andi Kleen
+ *	in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
+ *
+ *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/inet.h>
+#include <linux/mm.h>
+#include <linux/net.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+
+#include <asm/byteorder.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10;
+
+/*
+ * All net warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+}
+EXPORT_SYMBOL(net_ratelimit);
+
+/*
+ * Convert an ASCII string to binary IP.
+ * This is outside of net/ipv4/ because various code that uses IP addresses
+ * is otherwise not dependent on the TCP/IP stack.
+ */
+
+__be32 in_aton(const char *str)
+{
+	unsigned long l;
+	unsigned int val;
+	int i;
+
+	l = 0;
+	for (i = 0; i < 4; i++)
+	{
+		l <<= 8;
+		if (*str != '\0')
+		{
+			val = 0;
+			while (*str != '\0' && *str != '.' && *str != '\n')
+			{
+				val *= 10;
+				val += *str - '0';
+				str++;
+			}
+			l |= val;
+			if (*str != '\0')
+				str++;
+		}
+	}
+	return(htonl(l));
+}
+
+EXPORT_SYMBOL(in_aton);
+
+#define IN6PTON_XDIGIT		0x00010000
+#define IN6PTON_DIGIT		0x00020000
+#define IN6PTON_COLON_MASK	0x00700000
+#define IN6PTON_COLON_1		0x00100000	/* single : requested */
+#define IN6PTON_COLON_2		0x00200000	/* second : requested */
+#define IN6PTON_COLON_1_2	0x00400000	/* :: requested */
+#define IN6PTON_DOT		0x00800000	/* . */
+#define IN6PTON_DELIM		0x10000000
+#define IN6PTON_NULL		0x20000000	/* first/tail */
+#define IN6PTON_UNKNOWN		0x40000000
+
+static inline int digit2bin(char c, int delim)
+{
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == '.')
+		return IN6PTON_DOT;
+	if (c >= '0' && c <= '9')
+		return (IN6PTON_DIGIT | (c - '0'));
+	return IN6PTON_UNKNOWN;
+}
+
+static inline int xdigit2bin(char c, int delim)
+{
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == ':')
+		return IN6PTON_COLON_MASK;
+	if (c == '.')
+		return IN6PTON_DOT;
+	if (c >= '0' && c <= '9')
+		return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
+	if (c >= 'a' && c <= 'f')
+		return (IN6PTON_XDIGIT | (c - 'a' + 10));
+	if (c >= 'A' && c <= 'F')
+		return (IN6PTON_XDIGIT | (c - 'A' + 10));
+	if (delim == -1)
+		return IN6PTON_DELIM;
+	return IN6PTON_UNKNOWN;
+}
+
+int in4_pton(const char *src, int srclen,
+	     u8 *dst,
+	     int delim, const char **end)
+{
+	const char *s;
+	u8 *d;
+	u8 dbuf[4];
+	int ret = 0;
+	int i;
+	int w = 0;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+	s = src;
+	d = dbuf;
+	i = 0;
+	while(1) {
+		int c;
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
+			goto out;
+		}
+		if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+			if (w == 0)
+				goto out;
+			*d++ = w & 0xff;
+			w = 0;
+			i++;
+			if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+				if (i != 4)
+					goto out;
+				break;
+			}
+			goto cont;
+		}
+		w = (w * 10) + c;
+		if ((w & 0xffff) > 255) {
+			goto out;
+		}
+cont:
+		if (i >= 4)
+			goto out;
+		s++;
+		srclen--;
+	}
+	ret = 1;
+	memcpy(dst, dbuf, sizeof(dbuf));
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+
+EXPORT_SYMBOL(in4_pton);
+
+int in6_pton(const char *src, int srclen,
+	     u8 *dst,
+	     int delim, const char **end)
+{
+	const char *s, *tok = NULL;
+	u8 *d, *dc = NULL;
+	u8 dbuf[16];
+	int ret = 0;
+	int i;
+	int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
+	int w = 0;
+
+	memset(dbuf, 0, sizeof(dbuf));
+
+	s = src;
+	d = dbuf;
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	while (1) {
+		int c;
+
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & state))
+			goto out;
+		if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+			/* process one 16-bit word */
+			if (!(state & IN6PTON_NULL)) {
+				*d++ = (w >> 8) & 0xff;
+				*d++ = w & 0xff;
+			}
+			w = 0;
+			if (c & IN6PTON_DELIM) {
+				/* We've processed last word */
+				break;
+			}
+			/*
+			 * COLON_1 => XDIGIT
+			 * COLON_2 => XDIGIT|DELIM
+			 * COLON_1_2 => COLON_2
+			 */
+			switch (state & IN6PTON_COLON_MASK) {
+			case IN6PTON_COLON_2:
+				dc = d;
+				state = IN6PTON_XDIGIT | IN6PTON_DELIM;
+				if (dc - dbuf >= sizeof(dbuf))
+					state |= IN6PTON_NULL;
+				break;
+			case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
+				state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
+				break;
+			case IN6PTON_COLON_1:
+				state = IN6PTON_XDIGIT;
+				break;
+			case IN6PTON_COLON_1_2:
+				state = IN6PTON_COLON_2;
+				break;
+			default:
+				state = 0;
+			}
+			tok = s + 1;
+			goto cont;
+		}
+
+		if (c & IN6PTON_DOT) {
+			ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
+			if (ret > 0) {
+				d += 4;
+				break;
+			}
+			goto out;
+		}
+
+		w = (w << 4) | (0xff & c);
+		state = IN6PTON_COLON_1 | IN6PTON_DELIM;
+		if (!(w & 0xf000)) {
+			state |= IN6PTON_XDIGIT;
+		}
+		if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_COLON_1_2;
+			state &= ~IN6PTON_DELIM;
+		}
+		if (d + 2 >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
+		}
+cont:
+		if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
+		    d + 4 == dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_DOT;
+		}
+		if (d >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
+		}
+		s++;
+		srclen--;
+	}
+
+	i = 15; d--;
+
+	if (dc) {
+		while(d >= dc)
+			dst[i--] = *d--;
+		while(i >= dc - dbuf)
+			dst[i--] = 0;
+		while(i >= 0)
+			dst[i--] = *d--;
+	} else
+		memcpy(dst, dbuf, sizeof(dbuf));
+
+	ret = 1;
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+
+EXPORT_SYMBOL(in6_pton);
--- a/net/core/wireless.c
+++ b/net/core/wireless.c