Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

19
net/core/Makefile Normal file
View File

@@ -0,0 +1,19 @@
#
# Makefile for the Linux networking core.
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
gen_stats.o gen_estimator.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_XFRM) += flow.o
obj-$(CONFIG_SYSFS) += net-sysfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_WIRELESS_EXT) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o

535
net/core/datagram.c Normal file
View File

@@ -0,0 +1,535 @@
/*
* SUCS NET3:
*
* Generic datagram handling routines. These are generic for all
* protocols. Possibly a generic IP version on top of these would
* make sense. Not tonight however 8-).
* This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
* NetROM layer all have identical poll code and mostly
* identical recvmsg() code. So we share it here. The poll was
* shared before but buried in udp.c so I moved it.
*
* Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old
* udp.c code)
*
* Fixes:
* Alan Cox : NULL return from skb_peek_copy()
* understood
* Alan Cox : Rewrote skb_read_datagram to avoid the
* skb_peek_copy stuff.
* Alan Cox : Added support for SOCK_SEQPACKET.
* IPX can no longer use the SO_TYPE hack
* but AX.25 now works right, and SPX is
* feasible.
* Alan Cox : Fixed write poll of non IP protocol
* crash.
* Florian La Roche: Changed for my new skbuff handling.
* Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
* Linus Torvalds : BSD semantic fixes.
* Alan Cox : Datagram iovec handling
* Darryl Miles : Fixed non-blocking SOCK_STREAM.
* Alan Cox : POSIXisms
* Pete Wyckoff : Unconnected accept() fix.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/checksum.h>
#include <net/sock.h>
#include <net/tcp_states.h>
/*
* Is a socket 'connection oriented' ?
*/
static inline int connection_based(struct sock *sk)
{
return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
}
/*
* Wait for a packet..
*/
static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
{
int error;
DEFINE_WAIT(wait);
prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
/* Socket errors? */
error = sock_error(sk);
if (error)
goto out_err;
if (!skb_queue_empty(&sk->sk_receive_queue))
goto out;
/* Socket shut down? */
if (sk->sk_shutdown & RCV_SHUTDOWN)
goto out_noerr;
/* Sequenced packets can come disconnected.
* If so we report the problem
*/
error = -ENOTCONN;
if (connection_based(sk) &&
!(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
goto out_err;
/* handle signals */
if (signal_pending(current))
goto interrupted;
error = 0;
*timeo_p = schedule_timeout(*timeo_p);
out:
finish_wait(sk->sk_sleep, &wait);
return error;
interrupted:
error = sock_intr_errno(*timeo_p);
out_err:
*err = error;
goto out;
out_noerr:
*err = 0;
error = 1;
goto out;
}
/**
* skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
* @noblock: blocking operation?
* @err: error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
* and possible races. This replaces identical code in packet, raw and
* udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
* the long standing peek and read race for datagram sockets. If you
* alter this routine remember it must be re-entrant.
*
* This function will lock the socket if a skb is returned, so the caller
* needs to unlock the socket in that case (usually by calling
* skb_free_datagram)
*
* * It does not lock socket since today. This function is
* * free of race conditions. This measure should/can improve
* * significantly datagram socket latencies at high loads,
* * when data copying to user space takes lots of time.
* * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
* * 8) Great win.)
* * --ANK (980729)
*
* The order of the tests when we find no data waiting are specified
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
int noblock, int *err)
{
struct sk_buff *skb;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
*/
int error = sock_error(sk);
if (error)
goto no_packet;
timeo = sock_rcvtimeo(sk, noblock);
do {
/* Again only user level code calls this function, so nothing
* interrupt level will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
* However, this function was corrent in any case. 8)
*/
if (flags & MSG_PEEK) {
unsigned long cpu_flags;
spin_lock_irqsave(&sk->sk_receive_queue.lock,
cpu_flags);
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
atomic_inc(&skb->users);
spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
cpu_flags);
} else
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb)
return skb;
/* User doesn't want to wait */
error = -EAGAIN;
if (!timeo)
goto no_packet;
} while (!wait_for_packet(sk, err, &timeo));
return NULL;
no_packet:
*err = error;
return NULL;
}
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
}
/**
* skb_kill_datagram - Free a datagram skbuff forcibly
* @sk: socket
* @skb: datagram skbuff
* @flags: MSG_ flags
*
* This function frees a datagram skbuff that was received by
* skb_recv_datagram. The flags argument must match the one
* used for skb_recv_datagram.
*
* If the MSG_PEEK flag is set, and the packet is still on the
* receive queue of the socket, it will be taken off the queue
* before it is freed.
*
* This function currently only disables BH when acquiring the
* sk_receive_queue lock. Therefore it must not be used in a
* context where that lock is acquired in an IRQ context.
*/
void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
if (flags & MSG_PEEK) {
spin_lock_bh(&sk->sk_receive_queue.lock);
if (skb == skb_peek(&sk->sk_receive_queue)) {
__skb_unlink(skb, &sk->sk_receive_queue);
atomic_dec(&skb->users);
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
}
kfree_skb(skb);
}
EXPORT_SYMBOL(skb_kill_datagram);
/**
* skb_copy_datagram_iovec - Copy a datagram to an iovec.
* @skb: buffer to copy
* @offset: offset in the buffer to start copying from
* @to: io vector to copy to
* @len: amount of data to copy from buffer to iovec
*
* Note: the iovec is modified during the copy.
*/
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
struct iovec *to, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
if (memcpy_toiovec(to, skb->data + offset, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
BUG_TRAP(start <= offset + len);
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
int err;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
if (copy > len)
copy = len;
vaddr = kmap(page);
err = memcpy_toiovec(to, vaddr + frag->page_offset +
offset - start, copy);
kunmap(page);
if (err)
goto fault;
if (!(len -= copy))
return 0;
offset += copy;
}
start = end;
}
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list = list->next) {
int end;
BUG_TRAP(start <= offset + len);
end = start + list->len;
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
if (skb_copy_datagram_iovec(list,
offset - start,
to, copy))
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
}
start = end;
}
}
if (!len)
return 0;
fault:
return -EFAULT;
}
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
u8 __user *to, int len,
__wsum *csump)
{
int start = skb_headlen(skb);
int pos = 0;
int i, copy = start - offset;
/* Copy header. */
if (copy > 0) {
int err = 0;
if (copy > len)
copy = len;
*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
*csump, &err);
if (err)
goto fault;
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
pos = copy;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
BUG_TRAP(start <= offset + len);
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
__wsum csum2;
int err = 0;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
if (copy > len)
copy = len;
vaddr = kmap(page);
csum2 = csum_and_copy_to_user(vaddr +
frag->page_offset +
offset - start,
to, copy, 0, &err);
kunmap(page);
if (err)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list=list->next) {
int end;
BUG_TRAP(start <= offset + len);
end = start + list->len;
if ((copy = end - offset) > 0) {
__wsum csum2 = 0;
if (copy > len)
copy = len;
if (skb_copy_and_csum_datagram(list,
offset - start,
to, copy,
&csum2))
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if ((len -= copy) == 0)
return 0;
offset += copy;
to += copy;
pos += copy;
}
start = end;
}
}
if (!len)
return 0;
fault:
return -EFAULT;
}
__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
__sum16 sum;
sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete);
/**
* skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
* @skb: skbuff
* @hlen: hardware length
* @iov: io vector
*
* Caller _must_ check that skb will fit to this iovec.
*
* Returns: 0 - success.
* -EINVAL - checksum failure.
* -EFAULT - fault during copy. Beware, in this case iovec
* can be modified!
*/
int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
int hlen, struct iovec *iov)
{
__wsum csum;
int chunk = skb->len - hlen;
/* Skip filled elements.
* Pretty silly, look at memcpy_toiovec, though 8)
*/
while (!iov->iov_len)
iov++;
if (iov->iov_len < chunk) {
if (__skb_checksum_complete(skb))
goto csum_error;
if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
goto fault;
} else {
csum = csum_partial(skb->data, hlen, skb->csum);
if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
chunk, &csum))
goto fault;
if (csum_fold(csum))
goto csum_error;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
iov->iov_len -= chunk;
iov->iov_base += chunk;
}
return 0;
csum_error:
return -EINVAL;
fault:
return -EFAULT;
}
/**
* datagram_poll - generic datagram poll
* @file: file struct
* @sock: socket
* @wait: poll table
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
*
* Note: when you _don't_ use this routine for this protocol,
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
unsigned int datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
poll_wait(file, sk->sk_sleep, wait);
mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
/* readable? */
if (!skb_queue_empty(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
if (sk->sk_state == TCP_CLOSE)
mask |= POLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
}
/* writable? */
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
return mask;
}
EXPORT_SYMBOL(datagram_poll);
EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
EXPORT_SYMBOL(skb_copy_datagram_iovec);
EXPORT_SYMBOL(skb_free_datagram);
EXPORT_SYMBOL(skb_recv_datagram);

3573
net/core/dev.c Normal file

File diff suppressed because it is too large Load Diff

296
net/core/dev_mcast.c Normal file
View File

@@ -0,0 +1,296 @@
/*
* Linux NET3: Multicast List maintenance.
*
* Authors:
* Tim Kordas <tjk@nostromo.eeap.cwru.edu>
* Richard Underwood <richard@wuzz.demon.co.uk>
*
* Stir fried together from the IP multicast and CAP patches above
* Alan Cox <Alan.Cox@linux.org>
*
* Fixes:
* Alan Cox : Update the device on a real delete
* rather than any time but...
* Alan Cox : IFF_ALLMULTI support.
* Alan Cox : New format set_multicast_list() calls.
* Gleb Natapov : Remove dev_mc_lock.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
/*
* Device multicast list maintenance.
*
* This is used both by IP and by the user level maintenance functions.
* Unlike BSD we maintain a usage count on a given multicast address so
* that a casual user application can add/delete multicasts used by
* protocols without doing damage to the protocols when it deletes the
* entries. It also helps IP as it tracks overlapping maps.
*
* Device mc lists are changed by bh at least if IPv6 is enabled,
* so that it must be bh protected.
*
* We block accesses to device mc filters with netif_tx_lock.
*/
/*
* Update the multicast list into the physical NIC controller.
*/
static void __dev_mc_upload(struct net_device *dev)
{
/* Don't do anything till we up the interface
* [dev_open will call this function so the list will
* stay sane]
*/
if (!(dev->flags&IFF_UP))
return;
/*
* Devices with no set multicast or which have been
* detached don't get set.
*/
if (dev->set_multicast_list == NULL ||
!netif_device_present(dev))
return;
dev->set_multicast_list(dev);
}
void dev_mc_upload(struct net_device *dev)
{
netif_tx_lock_bh(dev);
__dev_mc_upload(dev);
netif_tx_unlock_bh(dev);
}
/*
* Delete a device level multicast
*/
int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, **dmip;
netif_tx_lock_bh(dev);
for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
/*
* Find the entry we want to delete. The device could
* have variable length entries so check these too.
*/
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
alen == dmi->dmi_addrlen) {
if (glbl) {
int old_glbl = dmi->dmi_gusers;
dmi->dmi_gusers = 0;
if (old_glbl == 0)
break;
}
if (--dmi->dmi_users)
goto done;
/*
* Last user. So delete the entry.
*/
*dmip = dmi->next;
dev->mc_count--;
kfree(dmi);
/*
* We have altered the list, so the card
* loaded filter is now wrong. Fix it
*/
__dev_mc_upload(dev);
netif_tx_unlock_bh(dev);
return 0;
}
}
err = -ENOENT;
done:
netif_tx_unlock_bh(dev);
return err;
}
/*
* Add a device level multicast
*/
int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
{
int err = 0;
struct dev_mc_list *dmi, *dmi1;
dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
netif_tx_lock_bh(dev);
for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
dmi->dmi_addrlen == alen) {
if (glbl) {
int old_glbl = dmi->dmi_gusers;
dmi->dmi_gusers = 1;
if (old_glbl)
goto done;
}
dmi->dmi_users++;
goto done;
}
}
if ((dmi = dmi1) == NULL) {
netif_tx_unlock_bh(dev);
return -ENOMEM;
}
memcpy(dmi->dmi_addr, addr, alen);
dmi->dmi_addrlen = alen;
dmi->next = dev->mc_list;
dmi->dmi_users = 1;
dmi->dmi_gusers = glbl ? 1 : 0;
dev->mc_list = dmi;
dev->mc_count++;
__dev_mc_upload(dev);
netif_tx_unlock_bh(dev);
return 0;
done:
netif_tx_unlock_bh(dev);
kfree(dmi1);
return err;
}
/*
* Discard multicast list when a device is downed
*/
void dev_mc_discard(struct net_device *dev)
{
netif_tx_lock_bh(dev);
while (dev->mc_list != NULL) {
struct dev_mc_list *tmp = dev->mc_list;
dev->mc_list = tmp->next;
if (tmp->dmi_users > tmp->dmi_gusers)
printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
kfree(tmp);
}
dev->mc_count = 0;
netif_tx_unlock_bh(dev);
}
#ifdef CONFIG_PROC_FS
static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
{
struct net_device *dev;
loff_t off = 0;
read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next) {
if (off++ == *pos)
return dev;
}
return NULL;
}
static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct net_device *dev = v;
++*pos;
return dev->next;
}
static void dev_mc_seq_stop(struct seq_file *seq, void *v)
{
read_unlock(&dev_base_lock);
}
static int dev_mc_seq_show(struct seq_file *seq, void *v)
{
struct dev_mc_list *m;
struct net_device *dev = v;
netif_tx_lock_bh(dev);
for (m = dev->mc_list; m; m = m->next) {
int i;
seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
dev->name, m->dmi_users, m->dmi_gusers);
for (i = 0; i < m->dmi_addrlen; i++)
seq_printf(seq, "%02x", m->dmi_addr[i]);
seq_putc(seq, '\n');
}
netif_tx_unlock_bh(dev);
return 0;
}
static struct seq_operations dev_mc_seq_ops = {
.start = dev_mc_seq_start,
.next = dev_mc_seq_next,
.stop = dev_mc_seq_stop,
.show = dev_mc_seq_show,
};
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
return seq_open(file, &dev_mc_seq_ops);
}
static const struct file_operations dev_mc_seq_fops = {
.owner = THIS_MODULE,
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif
void __init dev_mcast_init(void)
{
proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
}
EXPORT_SYMBOL(dev_mc_add);
EXPORT_SYMBOL(dev_mc_delete);
EXPORT_SYMBOL(dev_mc_upload);

288
net/core/dst.c Normal file
View File

@@ -0,0 +1,288 @@
/*
* net/core/dst.c Protocol independent destination cache.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
#include <net/dst.h>
/* Locking strategy:
* 1) Garbage collection state of dead destination cache
* entries is protected by dst_lock.
* 2) GC is run only from BH context, and is the only remover
* of entries.
* 3) Entries are added to the garbage list from both BH
* and non-BH context, so local BH disabling is needed.
* 4) All operations modify state, so a spinlock is used.
*/
static struct dst_entry *dst_garbage_list;
#if RT_CACHE_DEBUG >= 2
static atomic_t dst_total = ATOMIC_INIT(0);
#endif
static DEFINE_SPINLOCK(dst_lock);
static unsigned long dst_gc_timer_expires;
static unsigned long dst_gc_timer_inc = DST_GC_MAX;
static void dst_run_gc(unsigned long);
static void ___dst_free(struct dst_entry * dst);
static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0);
static void dst_run_gc(unsigned long dummy)
{
int delayed = 0;
int work_performed;
struct dst_entry * dst, **dstp;
if (!spin_trylock(&dst_lock)) {
mod_timer(&dst_gc_timer, jiffies + HZ/10);
return;
}
del_timer(&dst_gc_timer);
dstp = &dst_garbage_list;
work_performed = 0;
while ((dst = *dstp) != NULL) {
if (atomic_read(&dst->__refcnt)) {
dstp = &dst->next;
delayed++;
continue;
}
*dstp = dst->next;
work_performed = 1;
dst = dst_destroy(dst);
if (dst) {
/* NOHASH and still referenced. Unless it is already
* on gc list, invalidate it and add to gc list.
*
* Note: this is temporary. Actually, NOHASH dst's
* must be obsoleted when parent is obsoleted.
* But we do not have state "obsoleted, but
* referenced by parent", so it is right.
*/
if (dst->obsolete > 1)
continue;
___dst_free(dst);
dst->next = *dstp;
*dstp = dst;
dstp = &dst->next;
}
}
if (!dst_garbage_list) {
dst_gc_timer_inc = DST_GC_MAX;
goto out;
}
if (!work_performed) {
if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
dst_gc_timer_expires = DST_GC_MAX;
dst_gc_timer_inc += DST_GC_INC;
} else {
dst_gc_timer_inc = DST_GC_INC;
dst_gc_timer_expires = DST_GC_MIN;
}
#if RT_CACHE_DEBUG >= 2
printk("dst_total: %d/%d %ld\n",
atomic_read(&dst_total), delayed, dst_gc_timer_expires);
#endif
/* if the next desired timer is more than 4 seconds in the future
* then round the timer to whole seconds
*/
if (dst_gc_timer_expires > 4*HZ)
mod_timer(&dst_gc_timer,
round_jiffies(jiffies + dst_gc_timer_expires));
else
mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
out:
spin_unlock(&dst_lock);
}
static int dst_discard_in(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
static int dst_discard_out(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
void * dst_alloc(struct dst_ops * ops)
{
struct dst_entry * dst;
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
if (ops->gc())
return NULL;
}
dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
return NULL;
atomic_set(&dst->__refcnt, 0);
dst->ops = ops;
dst->lastuse = jiffies;
dst->path = dst;
dst->input = dst_discard_in;
dst->output = dst_discard_out;
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
atomic_inc(&ops->entries);
return dst;
}
static void ___dst_free(struct dst_entry * dst)
{
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard_in;
dst->output = dst_discard_out;
}
dst->obsolete = 2;
}
void __dst_free(struct dst_entry * dst)
{
spin_lock_bh(&dst_lock);
___dst_free(dst);
dst->next = dst_garbage_list;
dst_garbage_list = dst;
if (dst_gc_timer_inc > DST_GC_INC) {
dst_gc_timer_inc = DST_GC_INC;
dst_gc_timer_expires = DST_GC_MIN;
mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
}
spin_unlock_bh(&dst_lock);
}
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
struct neighbour *neigh;
struct hh_cache *hh;
smp_rmb();
again:
neigh = dst->neighbour;
hh = dst->hh;
child = dst->child;
dst->hh = NULL;
if (hh && atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
if (neigh) {
dst->neighbour = NULL;
neigh_release(neigh);
}
atomic_dec(&dst->ops->entries);
if (dst->ops->destroy)
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
#if RT_CACHE_DEBUG >= 2
atomic_dec(&dst_total);
#endif
kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
if (dst) {
int nohash = dst->flags & DST_NOHASH;
if (atomic_dec_and_test(&dst->__refcnt)) {
/* We were real parent of this dst, so kill child. */
if (nohash)
goto again;
} else {
/* Child is still referenced, return it for freeing. */
if (nohash)
return dst;
/* Child is still in his hash table */
}
}
return NULL;
}
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
* now. _It_ _is_ _explicit_ _deliberate_
* _race_ _condition_.
*
* Commented and originally written by Alexey.
*/
static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int unregister)
{
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev, unregister);
if (dev != dst->dev)
return;
if (!unregister) {
dst->input = dst_discard_in;
dst->output = dst_discard_out;
} else {
dst->dev = &loopback_dev;
dev_hold(&loopback_dev);
dev_put(dev);
if (dst->neighbour && dst->neighbour->dev == dev) {
dst->neighbour->dev = &loopback_dev;
dev_put(dev);
dev_hold(&loopback_dev);
}
}
}
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
struct dst_entry *dst;
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
spin_lock_bh(&dst_lock);
for (dst = dst_garbage_list; dst; dst = dst->next) {
dst_ifdown(dst, dev, event != NETDEV_DOWN);
}
spin_unlock_bh(&dst_lock);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
};
void __init dst_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
EXPORT_SYMBOL(__dst_free);
EXPORT_SYMBOL(dst_alloc);
EXPORT_SYMBOL(dst_destroy);

984
net/core/ethtool.c Normal file
View File

@@ -0,0 +1,984 @@
/*
* net/core/ethtool.c - Ethtool ioctl handler
* Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
*
* This file is where we call all the ethtool_ops commands to get
* the information ethtool needs. We fall back to calling do_ioctl()
* for drivers which haven't been converted to ethtool_ops yet.
*
* It's GPL, stupid.
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <asm/uaccess.h>
/*
* Some useful ethtool_ops methods that're device independent.
* If we find that all drivers want to do the same thing here,
* we can turn these into dev_() function calls.
*/
u32 ethtool_op_get_link(struct net_device *dev)
{
return netif_carrier_ok(dev) ? 1 : 0;
}
u32 ethtool_op_get_tx_csum(struct net_device *dev)
{
return (dev->features & NETIF_F_ALL_CSUM) != 0;
}
int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_IP_CSUM;
else
dev->features &= ~NETIF_F_IP_CSUM;
return 0;
}
int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_HW_CSUM;
else
dev->features &= ~NETIF_F_HW_CSUM;
return 0;
}
u32 ethtool_op_get_sg(struct net_device *dev)
{
return (dev->features & NETIF_F_SG) != 0;
}
int ethtool_op_set_sg(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_SG;
else
dev->features &= ~NETIF_F_SG;
return 0;
}
u32 ethtool_op_get_tso(struct net_device *dev)
{
return (dev->features & NETIF_F_TSO) != 0;
}
int ethtool_op_set_tso(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_TSO;
else
dev->features &= ~NETIF_F_TSO;
return 0;
}
int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
{
unsigned char len = dev->addr_len;
if ( addr->size < len )
return -ETOOSMALL;
addr->size = len;
memcpy(data, dev->perm_addr, len);
return 0;
}
u32 ethtool_op_get_ufo(struct net_device *dev)
{
return (dev->features & NETIF_F_UFO) != 0;
}
int ethtool_op_set_ufo(struct net_device *dev, u32 data)
{
if (data)
dev->features |= NETIF_F_UFO;
else
dev->features &= ~NETIF_F_UFO;
return 0;
}
/* Handlers for each ethtool command */
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd = { ETHTOOL_GSET };
int err;
if (!dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
err = dev->ethtool_ops->get_settings(dev, &cmd);
if (err < 0)
return err;
if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
return -EFAULT;
return 0;
}
static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd;
if (!dev->ethtool_ops->set_settings)
return -EOPNOTSUPP;
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
return dev->ethtool_ops->set_settings(dev, &cmd);
}
static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
{
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
if (!ops->get_drvinfo)
return -EOPNOTSUPP;
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
ops->get_drvinfo(dev, &info);
if (ops->self_test_count)
info.testinfo_len = ops->self_test_count(dev);
if (ops->get_stats_count)
info.n_stats = ops->get_stats_count(dev);
if (ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
if (ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
if (copy_to_user(useraddr, &info, sizeof(info)))
return -EFAULT;
return 0;
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
{
struct ethtool_regs regs;
const struct ethtool_ops *ops = dev->ethtool_ops;
void *regbuf;
int reglen, ret;
if (!ops->get_regs || !ops->get_regs_len)
return -EOPNOTSUPP;
if (copy_from_user(&regs, useraddr, sizeof(regs)))
return -EFAULT;
reglen = ops->get_regs_len(dev);
if (regs.len > reglen)
regs.len = reglen;
regbuf = kmalloc(reglen, GFP_USER);
if (!regbuf)
return -ENOMEM;
ops->get_regs(dev, &regs, regbuf);
ret = -EFAULT;
if (copy_to_user(useraddr, &regs, sizeof(regs)))
goto out;
useraddr += offsetof(struct ethtool_regs, data);
if (copy_to_user(useraddr, regbuf, regs.len))
goto out;
ret = 0;
out:
kfree(regbuf);
return ret;
}
static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
{
struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
if (!dev->ethtool_ops->get_wol)
return -EOPNOTSUPP;
dev->ethtool_ops->get_wol(dev, &wol);
if (copy_to_user(useraddr, &wol, sizeof(wol)))
return -EFAULT;
return 0;
}
static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
{
struct ethtool_wolinfo wol;
if (!dev->ethtool_ops->set_wol)
return -EOPNOTSUPP;
if (copy_from_user(&wol, useraddr, sizeof(wol)))
return -EFAULT;
return dev->ethtool_ops->set_wol(dev, &wol);
}
static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GMSGLVL };
if (!dev->ethtool_ops->get_msglevel)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_msglevel(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_msglevel)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
dev->ethtool_ops->set_msglevel(dev, edata.data);
return 0;
}
static int ethtool_nway_reset(struct net_device *dev)
{
if (!dev->ethtool_ops->nway_reset)
return -EOPNOTSUPP;
return dev->ethtool_ops->nway_reset(dev);
}
static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GLINK };
if (!dev->ethtool_ops->get_link)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_link(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
if (!ops->get_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
return -EFAULT;
/* Check for wrap and zero */
if (eeprom.offset + eeprom.len <= eeprom.offset)
return -EINVAL;
/* Check for exceeding total eeprom len */
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
data = kmalloc(eeprom.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = -EFAULT;
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
goto out;
ret = ops->get_eeprom(dev, &eeprom, data);
if (ret)
goto out;
ret = -EFAULT;
if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
goto out;
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
if (!ops->set_eeprom || !ops->get_eeprom_len)
return -EOPNOTSUPP;
if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
return -EFAULT;
/* Check for wrap and zero */
if (eeprom.offset + eeprom.len <= eeprom.offset)
return -EINVAL;
/* Check for exceeding total eeprom len */
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
data = kmalloc(eeprom.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = -EFAULT;
if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
goto out;
ret = ops->set_eeprom(dev, &eeprom, data);
if (ret)
goto out;
if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
ret = -EFAULT;
out:
kfree(data);
return ret;
}
static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
{
struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
dev->ethtool_ops->get_coalesce(dev, &coalesce);
if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
return -EFAULT;
return 0;
}
static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
{
struct ethtool_coalesce coalesce;
if (!dev->ethtool_ops->set_coalesce)
return -EOPNOTSUPP;
if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
return -EFAULT;
return dev->ethtool_ops->set_coalesce(dev, &coalesce);
}
static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
if (!dev->ethtool_ops->get_ringparam)
return -EOPNOTSUPP;
dev->ethtool_ops->get_ringparam(dev, &ringparam);
if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
return -EFAULT;
return 0;
}
static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_ringparam ringparam;
if (!dev->ethtool_ops->set_ringparam)
return -EOPNOTSUPP;
if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
return -EFAULT;
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
if (!dev->ethtool_ops->get_pauseparam)
return -EOPNOTSUPP;
dev->ethtool_ops->get_pauseparam(dev, &pauseparam);
if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
return -EFAULT;
return 0;
}
static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_pauseparam pauseparam;
if (!dev->ethtool_ops->set_pauseparam)
return -EOPNOTSUPP;
if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
return -EFAULT;
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GRXCSUM };
if (!dev->ethtool_ops->get_rx_csum)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_rx_csum(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_rx_csum)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
dev->ethtool_ops->set_rx_csum(dev, edata.data);
return 0;
}
static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GTXCSUM };
if (!dev->ethtool_ops->get_tx_csum)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_tx_csum(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int __ethtool_set_sg(struct net_device *dev, u32 data)
{
int err;
if (!data && dev->ethtool_ops->set_tso) {
err = dev->ethtool_ops->set_tso(dev, 0);
if (err)
return err;
}
if (!data && dev->ethtool_ops->set_ufo) {
err = dev->ethtool_ops->set_ufo(dev, 0);
if (err)
return err;
}
return dev->ethtool_ops->set_sg(dev, data);
}
static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
int err;
if (!dev->ethtool_ops->set_tx_csum)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (!edata.data && dev->ethtool_ops->set_sg) {
err = __ethtool_set_sg(dev, 0);
if (err)
return err;
}
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GSG };
if (!dev->ethtool_ops->get_sg)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_sg(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_sg)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (edata.data &&
!(dev->features & NETIF_F_ALL_CSUM))
return -EINVAL;
return __ethtool_set_sg(dev, edata.data);
}
static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GTSO };
if (!dev->ethtool_ops->get_tso)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_tso(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_tso)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (edata.data && !(dev->features & NETIF_F_SG))
return -EINVAL;
return dev->ethtool_ops->set_tso(dev, edata.data);
}
static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GUFO };
if (!dev->ethtool_ops->get_ufo)
return -EOPNOTSUPP;
edata.data = dev->ethtool_ops->get_ufo(dev);
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (!dev->ethtool_ops->set_ufo)
return -EOPNOTSUPP;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (edata.data && !(dev->features & NETIF_F_SG))
return -EINVAL;
if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
return -EINVAL;
return dev->ethtool_ops->set_ufo(dev, edata.data);
}
static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { ETHTOOL_GGSO };
edata.data = dev->features & NETIF_F_GSO;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
}
static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
if (copy_from_user(&edata, useraddr, sizeof(edata)))
return -EFAULT;
if (edata.data)
dev->features |= NETIF_F_GSO;
else
dev->features &= ~NETIF_F_GSO;
return 0;
}
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
if (!ops->self_test || !ops->self_test_count)
return -EOPNOTSUPP;
if (copy_from_user(&test, useraddr, sizeof(test)))
return -EFAULT;
test.len = ops->self_test_count(dev);
data = kmalloc(test.len * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
ops->self_test(dev, &test, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &test, sizeof(test)))
goto out;
useraddr += sizeof(test);
if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gstrings gstrings;
const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
if (!ops->get_strings)
return -EOPNOTSUPP;
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
switch (gstrings.string_set) {
case ETH_SS_TEST:
if (!ops->self_test_count)
return -EOPNOTSUPP;
gstrings.len = ops->self_test_count(dev);
break;
case ETH_SS_STATS:
if (!ops->get_stats_count)
return -EOPNOTSUPP;
gstrings.len = ops->get_stats_count(dev);
break;
default:
return -EINVAL;
}
data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
if (!data)
return -ENOMEM;
ops->get_strings(dev, gstrings.string_set, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
goto out;
useraddr += sizeof(gstrings);
if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value id;
if (!dev->ethtool_ops->phys_id)
return -EOPNOTSUPP;
if (copy_from_user(&id, useraddr, sizeof(id)))
return -EFAULT;
return dev->ethtool_ops->phys_id(dev, id.data);
}
static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
{
struct ethtool_stats stats;
const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
int ret;
if (!ops->get_ethtool_stats || !ops->get_stats_count)
return -EOPNOTSUPP;
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
stats.n_stats = ops->get_stats_count(dev);
data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
ops->get_ethtool_stats(dev, &stats, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &stats, sizeof(stats)))
goto out;
useraddr += sizeof(stats);
if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
{
struct ethtool_perm_addr epaddr;
u8 *data;
int ret;
if (!dev->ethtool_ops->get_perm_addr)
return -EOPNOTSUPP;
if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
return -EFAULT;
data = kmalloc(epaddr.size, GFP_USER);
if (!data)
return -ENOMEM;
ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
if (ret)
return ret;
ret = -EFAULT;
if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
goto out;
useraddr += sizeof(epaddr);
if (copy_to_user(useraddr, data, epaddr.size))
goto out;
ret = 0;
out:
kfree(data);
return ret;
}
/* The main entry point in this file. Called from net/core/dev.c */
int dev_ethtool(struct ifreq *ifr)
{
struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
unsigned long old_features;
if (!dev || !netif_device_present(dev))
return -ENODEV;
if (!dev->ethtool_ops)
goto ioctl;
if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
return -EFAULT;
/* Allow some commands to be done by anyone */
switch(ethcmd) {
case ETHTOOL_GDRVINFO:
case ETHTOOL_GMSGLVL:
case ETHTOOL_GCOALESCE:
case ETHTOOL_GRINGPARAM:
case ETHTOOL_GPAUSEPARAM:
case ETHTOOL_GRXCSUM:
case ETHTOOL_GTXCSUM:
case ETHTOOL_GSG:
case ETHTOOL_GSTRINGS:
case ETHTOOL_GTSO:
case ETHTOOL_GPERMADDR:
case ETHTOOL_GUFO:
case ETHTOOL_GGSO:
break;
default:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
}
if(dev->ethtool_ops->begin)
if ((rc = dev->ethtool_ops->begin(dev)) < 0)
return rc;
old_features = dev->features;
switch (ethcmd) {
case ETHTOOL_GSET:
rc = ethtool_get_settings(dev, useraddr);
break;
case ETHTOOL_SSET:
rc = ethtool_set_settings(dev, useraddr);
break;
case ETHTOOL_GDRVINFO:
rc = ethtool_get_drvinfo(dev, useraddr);
break;
case ETHTOOL_GREGS:
rc = ethtool_get_regs(dev, useraddr);
break;
case ETHTOOL_GWOL:
rc = ethtool_get_wol(dev, useraddr);
break;
case ETHTOOL_SWOL:
rc = ethtool_set_wol(dev, useraddr);
break;
case ETHTOOL_GMSGLVL:
rc = ethtool_get_msglevel(dev, useraddr);
break;
case ETHTOOL_SMSGLVL:
rc = ethtool_set_msglevel(dev, useraddr);
break;
case ETHTOOL_NWAY_RST:
rc = ethtool_nway_reset(dev);
break;
case ETHTOOL_GLINK:
rc = ethtool_get_link(dev, useraddr);
break;
case ETHTOOL_GEEPROM:
rc = ethtool_get_eeprom(dev, useraddr);
break;
case ETHTOOL_SEEPROM:
rc = ethtool_set_eeprom(dev, useraddr);
break;
case ETHTOOL_GCOALESCE:
rc = ethtool_get_coalesce(dev, useraddr);
break;
case ETHTOOL_SCOALESCE:
rc = ethtool_set_coalesce(dev, useraddr);
break;
case ETHTOOL_GRINGPARAM:
rc = ethtool_get_ringparam(dev, useraddr);
break;
case ETHTOOL_SRINGPARAM:
rc = ethtool_set_ringparam(dev, useraddr);
break;
case ETHTOOL_GPAUSEPARAM:
rc = ethtool_get_pauseparam(dev, useraddr);
break;
case ETHTOOL_SPAUSEPARAM:
rc = ethtool_set_pauseparam(dev, useraddr);
break;
case ETHTOOL_GRXCSUM:
rc = ethtool_get_rx_csum(dev, useraddr);
break;
case ETHTOOL_SRXCSUM:
rc = ethtool_set_rx_csum(dev, useraddr);
break;
case ETHTOOL_GTXCSUM:
rc = ethtool_get_tx_csum(dev, useraddr);
break;
case ETHTOOL_STXCSUM:
rc = ethtool_set_tx_csum(dev, useraddr);
break;
case ETHTOOL_GSG:
rc = ethtool_get_sg(dev, useraddr);
break;
case ETHTOOL_SSG:
rc = ethtool_set_sg(dev, useraddr);
break;
case ETHTOOL_GTSO:
rc = ethtool_get_tso(dev, useraddr);
break;
case ETHTOOL_STSO:
rc = ethtool_set_tso(dev, useraddr);
break;
case ETHTOOL_TEST:
rc = ethtool_self_test(dev, useraddr);
break;
case ETHTOOL_GSTRINGS:
rc = ethtool_get_strings(dev, useraddr);
break;
case ETHTOOL_PHYS_ID:
rc = ethtool_phys_id(dev, useraddr);
break;
case ETHTOOL_GSTATS:
rc = ethtool_get_stats(dev, useraddr);
break;
case ETHTOOL_GPERMADDR:
rc = ethtool_get_perm_addr(dev, useraddr);
break;
case ETHTOOL_GUFO:
rc = ethtool_get_ufo(dev, useraddr);
break;
case ETHTOOL_SUFO:
rc = ethtool_set_ufo(dev, useraddr);
break;
case ETHTOOL_GGSO:
rc = ethtool_get_gso(dev, useraddr);
break;
case ETHTOOL_SGSO:
rc = ethtool_set_gso(dev, useraddr);
break;
default:
rc = -EOPNOTSUPP;
}
if(dev->ethtool_ops->complete)
dev->ethtool_ops->complete(dev);
if (old_features != dev->features)
netdev_features_change(dev);
return rc;
ioctl:
/* Keep existing behaviour for the moment. */
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (dev->do_ioctl)
return dev->do_ioctl(dev, ifr, SIOCETHTOOL);
return -EOPNOTSUPP;
}
EXPORT_SYMBOL(dev_ethtool);
EXPORT_SYMBOL(ethtool_op_get_link);
EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
EXPORT_SYMBOL(ethtool_op_get_sg);
EXPORT_SYMBOL(ethtool_op_get_tso);
EXPORT_SYMBOL(ethtool_op_get_tx_csum);
EXPORT_SYMBOL(ethtool_op_set_sg);
EXPORT_SYMBOL(ethtool_op_set_tso);
EXPORT_SYMBOL(ethtool_op_set_tx_csum);
EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
EXPORT_SYMBOL(ethtool_op_set_ufo);
EXPORT_SYMBOL(ethtool_op_get_ufo);

507
net/core/fib_rules.c Normal file
View File

@@ -0,0 +1,507 @@
/*
* net/core/fib_rules.c Generic Routing Rules
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2.
*
* Authors: Thomas Graf <tgraf@suug.ch>
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <net/fib_rules.h>
static LIST_HEAD(rules_ops);
static DEFINE_SPINLOCK(rules_mod_lock);
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid);
static struct fib_rules_ops *lookup_rules_ops(int family)
{
struct fib_rules_ops *ops;
rcu_read_lock();
list_for_each_entry_rcu(ops, &rules_ops, list) {
if (ops->family == family) {
if (!try_module_get(ops->owner))
ops = NULL;
rcu_read_unlock();
return ops;
}
}
rcu_read_unlock();
return NULL;
}
static void rules_ops_put(struct fib_rules_ops *ops)
{
if (ops)
module_put(ops->owner);
}
int fib_rules_register(struct fib_rules_ops *ops)
{
int err = -EEXIST;
struct fib_rules_ops *o;
if (ops->rule_size < sizeof(struct fib_rule))
return -EINVAL;
if (ops->match == NULL || ops->configure == NULL ||
ops->compare == NULL || ops->fill == NULL ||
ops->action == NULL)
return -EINVAL;
spin_lock(&rules_mod_lock);
list_for_each_entry(o, &rules_ops, list)
if (ops->family == o->family)
goto errout;
list_add_tail_rcu(&ops->list, &rules_ops);
err = 0;
errout:
spin_unlock(&rules_mod_lock);
return err;
}
EXPORT_SYMBOL_GPL(fib_rules_register);
static void cleanup_ops(struct fib_rules_ops *ops)
{
struct fib_rule *rule, *tmp;
list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
list_del_rcu(&rule->list);
fib_rule_put(rule);
}
}
int fib_rules_unregister(struct fib_rules_ops *ops)
{
int err = 0;
struct fib_rules_ops *o;
spin_lock(&rules_mod_lock);
list_for_each_entry(o, &rules_ops, list) {
if (o == ops) {
list_del_rcu(&o->list);
cleanup_ops(ops);
goto out;
}
}
err = -ENOENT;
out:
spin_unlock(&rules_mod_lock);
synchronize_rcu();
return err;
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags)
{
int ret = 0;
if (rule->ifindex && (rule->ifindex != fl->iif))
goto out;
if ((rule->mark ^ fl->mark) & rule->mark_mask)
goto out;
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
}
int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
int flags, struct fib_lookup_arg *arg)
{
struct fib_rule *rule;
int err;
rcu_read_lock();
list_for_each_entry_rcu(rule, ops->rules_list, list) {
if (!fib_rule_match(rule, ops, fl, flags))
continue;
err = ops->action(rule, fl, flags, arg);
if (err != -EAGAIN) {
fib_rule_get(rule);
arg->rule = rule;
goto out;
}
}
err = -ESRCH;
out:
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
struct fib_rules_ops *ops)
{
int err = -EINVAL;
if (frh->src_len)
if (tb[FRA_SRC] == NULL ||
frh->src_len > (ops->addr_size * 8) ||
nla_len(tb[FRA_SRC]) != ops->addr_size)
goto errout;
if (frh->dst_len)
if (tb[FRA_DST] == NULL ||
frh->dst_len > (ops->addr_size * 8) ||
nla_len(tb[FRA_DST]) != ops->addr_size)
goto errout;
err = 0;
errout:
return err;
}
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r, *last = NULL;
struct nlattr *tb[FRA_MAX+1];
int err = -EINVAL;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
goto errout;
ops = lookup_rules_ops(frh->family);
if (ops == NULL) {
err = EAFNOSUPPORT;
goto errout;
}
err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
if (err < 0)
goto errout;
err = validate_rulemsg(frh, tb, ops);
if (err < 0)
goto errout;
rule = kzalloc(ops->rule_size, GFP_KERNEL);
if (rule == NULL) {
err = -ENOMEM;
goto errout;
}
if (tb[FRA_PRIORITY])
rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
if (tb[FRA_IFNAME]) {
struct net_device *dev;
rule->ifindex = -1;
nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
dev = __dev_get_by_name(rule->ifname);
if (dev)
rule->ifindex = dev->ifindex;
}
if (tb[FRA_FWMARK]) {
rule->mark = nla_get_u32(tb[FRA_FWMARK]);
if (rule->mark)
/* compatibility: if the mark value is non-zero all bits
* are compared unless a mask is explicitly specified.
*/
rule->mark_mask = 0xFFFFFFFF;
}
if (tb[FRA_FWMASK])
rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
if (!rule->pref && ops->default_pref)
rule->pref = ops->default_pref();
err = ops->configure(rule, skb, nlh, frh, tb);
if (err < 0)
goto errout_free;
list_for_each_entry(r, ops->rules_list, list) {
if (r->pref > rule->pref)
break;
last = r;
}
fib_rule_get(rule);
if (last)
list_add_rcu(&rule->list, &last->list);
else
list_add_rcu(&rule->list, ops->rules_list);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
rules_ops_put(ops);
return 0;
errout_free:
kfree(rule);
errout:
rules_ops_put(ops);
return err;
}
int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule;
struct nlattr *tb[FRA_MAX+1];
int err = -EINVAL;
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
goto errout;
ops = lookup_rules_ops(frh->family);
if (ops == NULL) {
err = EAFNOSUPPORT;
goto errout;
}
err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
if (err < 0)
goto errout;
err = validate_rulemsg(frh, tb, ops);
if (err < 0)
goto errout;
list_for_each_entry(rule, ops->rules_list, list) {
if (frh->action && (frh->action != rule->action))
continue;
if (frh->table && (frh_get_table(frh, tb) != rule->table))
continue;
if (tb[FRA_PRIORITY] &&
(rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
continue;
if (tb[FRA_IFNAME] &&
nla_strcmp(tb[FRA_IFNAME], rule->ifname))
continue;
if (tb[FRA_FWMARK] &&
(rule->mark != nla_get_u32(tb[FRA_FWMARK])))
continue;
if (tb[FRA_FWMASK] &&
(rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
continue;
if (!ops->compare(rule, frh, tb))
continue;
if (rule->flags & FIB_RULE_PERMANENT) {
err = -EPERM;
goto errout;
}
list_del_rcu(&rule->list);
synchronize_rcu();
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).pid);
fib_rule_put(rule);
rules_ops_put(ops);
return 0;
}
err = -ENOENT;
errout:
rules_ops_put(ops);
return err;
}
static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
struct fib_rule *rule)
{
size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
+ nla_total_size(IFNAMSIZ) /* FRA_IFNAME */
+ nla_total_size(4) /* FRA_PRIORITY */
+ nla_total_size(4) /* FRA_TABLE */
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4); /* FRA_FWMASK */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
return payload;
}
static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
u32 pid, u32 seq, int type, int flags,
struct fib_rules_ops *ops)
{
struct nlmsghdr *nlh;
struct fib_rule_hdr *frh;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
if (nlh == NULL)
return -EMSGSIZE;
frh = nlmsg_data(nlh);
frh->table = rule->table;
NLA_PUT_U32(skb, FRA_TABLE, rule->table);
frh->res1 = 0;
frh->res2 = 0;
frh->action = rule->action;
frh->flags = rule->flags;
if (rule->ifname[0])
NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
if (rule->pref)
NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
if (rule->mark)
NLA_PUT_U32(skb, FRA_FWMARK, rule->mark);
if (rule->mark_mask || rule->mark)
NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
if (ops->fill(rule, skb, nlh, frh) < 0)
goto nla_put_failure;
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
{
int idx = 0;
struct fib_rule *rule;
struct fib_rules_ops *ops;
ops = lookup_rules_ops(family);
if (ops == NULL)
return -EAFNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(rule, ops->rules_list, list) {
if (idx < cb->args[0])
goto skip;
if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWRULE,
NLM_F_MULTI, ops) < 0)
break;
skip:
idx++;
}
rcu_read_unlock();
cb->args[0] = idx;
rules_ops_put(ops);
return skb->len;
}
EXPORT_SYMBOL_GPL(fib_rules_dump);
static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid)
{
struct sk_buff *skb;
int err = -ENOBUFS;
skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
errout:
if (err < 0)
rtnl_set_sk_err(ops->nlgroup, err);
}
static void attach_rules(struct list_head *rules, struct net_device *dev)
{
struct fib_rule *rule;
list_for_each_entry(rule, rules, list) {
if (rule->ifindex == -1 &&
strcmp(dev->name, rule->ifname) == 0)
rule->ifindex = dev->ifindex;
}
}
static void detach_rules(struct list_head *rules, struct net_device *dev)
{
struct fib_rule *rule;
list_for_each_entry(rule, rules, list)
if (rule->ifindex == dev->ifindex)
rule->ifindex = -1;
}
static int fib_rules_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
struct fib_rules_ops *ops;
ASSERT_RTNL();
rcu_read_lock();
switch (event) {
case NETDEV_REGISTER:
list_for_each_entry(ops, &rules_ops, list)
attach_rules(ops->rules_list, dev);
break;
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &rules_ops, list)
detach_rules(ops->rules_list, dev);
break;
}
rcu_read_unlock();
return NOTIFY_DONE;
}
static struct notifier_block fib_rules_notifier = {
.notifier_call = fib_rules_event,
};
static int __init fib_rules_init(void)
{
return register_netdevice_notifier(&fib_rules_notifier);
}
subsys_initcall(fib_rules_init);

437
net/core/filter.c Normal file
View File

@@ -0,0 +1,437 @@
/*
* Linux Socket Filter - Kernel level socket filtering
*
* Author:
* Jay Schulist <jschlst@samba.org>
*
* Based on the design of:
* - The Berkeley Packet Filter
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Andi Kleen - Fix a few bad bugs and races.
* Kris Katterjohn - Added many additional checks in sk_chk_filter()
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
/* No hurry in this branch */
static void *__load_pointer(struct sk_buff *skb, int k)
{
u8 *ptr = NULL;
if (k >= SKF_NET_OFF)
ptr = skb->nh.raw + k - SKF_NET_OFF;
else if (k >= SKF_LL_OFF)
ptr = skb->mac.raw + k - SKF_LL_OFF;
if (ptr >= skb->head && ptr < skb->tail)
return ptr;
return NULL;
}
static inline void *load_pointer(struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
else {
if (k >= SKF_AD_OFF)
return NULL;
return __load_pointer(skb, k);
}
}
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
* @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
* Return length to keep, 0 for none. skb is the data we are
* filtering, filter is the array of filter instructions, and
* len is the number of filter blocks in the array.
*/
unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
u32 tmp;
int k;
int pc;
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
fentry = &filter[pc];
switch (fentry->code) {
case BPF_ALU|BPF_ADD|BPF_X:
A += X;
continue;
case BPF_ALU|BPF_ADD|BPF_K:
A += fentry->k;
continue;
case BPF_ALU|BPF_SUB|BPF_X:
A -= X;
continue;
case BPF_ALU|BPF_SUB|BPF_K:
A -= fentry->k;
continue;
case BPF_ALU|BPF_MUL|BPF_X:
A *= X;
continue;
case BPF_ALU|BPF_MUL|BPF_K:
A *= fentry->k;
continue;
case BPF_ALU|BPF_DIV|BPF_X:
if (X == 0)
return 0;
A /= X;
continue;
case BPF_ALU|BPF_DIV|BPF_K:
A /= fentry->k;
continue;
case BPF_ALU|BPF_AND|BPF_X:
A &= X;
continue;
case BPF_ALU|BPF_AND|BPF_K:
A &= fentry->k;
continue;
case BPF_ALU|BPF_OR|BPF_X:
A |= X;
continue;
case BPF_ALU|BPF_OR|BPF_K:
A |= fentry->k;
continue;
case BPF_ALU|BPF_LSH|BPF_X:
A <<= X;
continue;
case BPF_ALU|BPF_LSH|BPF_K:
A <<= fentry->k;
continue;
case BPF_ALU|BPF_RSH|BPF_X:
A >>= X;
continue;
case BPF_ALU|BPF_RSH|BPF_K:
A >>= fentry->k;
continue;
case BPF_ALU|BPF_NEG:
A = -A;
continue;
case BPF_JMP|BPF_JA:
pc += fentry->k;
continue;
case BPF_JMP|BPF_JGT|BPF_K:
pc += (A > fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGE|BPF_K:
pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JEQ|BPF_K:
pc += (A == fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JSET|BPF_K:
pc += (A & fentry->k) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGT|BPF_X:
pc += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JGE|BPF_X:
pc += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JEQ|BPF_X:
pc += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_JMP|BPF_JSET|BPF_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_LD|BPF_W|BPF_ABS:
k = fentry->k;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
A = ntohl(get_unaligned((__be32 *)ptr));
continue;
}
break;
case BPF_LD|BPF_H|BPF_ABS:
k = fentry->k;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
A = ntohs(get_unaligned((__be16 *)ptr));
continue;
}
break;
case BPF_LD|BPF_B|BPF_ABS:
k = fentry->k;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
A = *(u8 *)ptr;
continue;
}
break;
case BPF_LD|BPF_W|BPF_LEN:
A = skb->len;
continue;
case BPF_LDX|BPF_W|BPF_LEN:
X = skb->len;
continue;
case BPF_LD|BPF_W|BPF_IND:
k = X + fentry->k;
goto load_w;
case BPF_LD|BPF_H|BPF_IND:
k = X + fentry->k;
goto load_h;
case BPF_LD|BPF_B|BPF_IND:
k = X + fentry->k;
goto load_b;
case BPF_LDX|BPF_B|BPF_MSH:
ptr = load_pointer(skb, fentry->k, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_LD|BPF_IMM:
A = fentry->k;
continue;
case BPF_LDX|BPF_IMM:
X = fentry->k;
continue;
case BPF_LD|BPF_MEM:
A = mem[fentry->k];
continue;
case BPF_LDX|BPF_MEM:
X = mem[fentry->k];
continue;
case BPF_MISC|BPF_TAX:
X = A;
continue;
case BPF_MISC|BPF_TXA:
A = X;
continue;
case BPF_RET|BPF_K:
return fentry->k;
case BPF_RET|BPF_A:
return A;
case BPF_ST:
mem[fentry->k] = A;
continue;
case BPF_STX:
mem[fentry->k] = X;
continue;
default:
WARN_ON(1);
return 0;
}
/*
* Handle ancillary data, which are impossible
* (or very difficult) to get parsing packet contents.
*/
switch (k-SKF_AD_OFF) {
case SKF_AD_PROTOCOL:
A = ntohs(skb->protocol);
continue;
case SKF_AD_PKTTYPE:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
A = skb->dev->ifindex;
continue;
default:
return 0;
}
}
return 0;
}
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
* @flen: length of filter
*
* Check the user's filter code. If we let some ugly
* filter code slip through kaboom! The filter must contain
* no references or jumps that are out of range, no illegal
* instructions, and must end with a RET instruction.
*
* All jumps are forward as they are not signed.
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
struct sock_filter *ftest;
int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
return -EINVAL;
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
ftest = &filter[pc];
/* Only allow valid instructions */
switch (ftest->code) {
case BPF_ALU|BPF_ADD|BPF_K:
case BPF_ALU|BPF_ADD|BPF_X:
case BPF_ALU|BPF_SUB|BPF_K:
case BPF_ALU|BPF_SUB|BPF_X:
case BPF_ALU|BPF_MUL|BPF_K:
case BPF_ALU|BPF_MUL|BPF_X:
case BPF_ALU|BPF_DIV|BPF_X:
case BPF_ALU|BPF_AND|BPF_K:
case BPF_ALU|BPF_AND|BPF_X:
case BPF_ALU|BPF_OR|BPF_K:
case BPF_ALU|BPF_OR|BPF_X:
case BPF_ALU|BPF_LSH|BPF_K:
case BPF_ALU|BPF_LSH|BPF_X:
case BPF_ALU|BPF_RSH|BPF_K:
case BPF_ALU|BPF_RSH|BPF_X:
case BPF_ALU|BPF_NEG:
case BPF_LD|BPF_W|BPF_ABS:
case BPF_LD|BPF_H|BPF_ABS:
case BPF_LD|BPF_B|BPF_ABS:
case BPF_LD|BPF_W|BPF_LEN:
case BPF_LD|BPF_W|BPF_IND:
case BPF_LD|BPF_H|BPF_IND:
case BPF_LD|BPF_B|BPF_IND:
case BPF_LD|BPF_IMM:
case BPF_LDX|BPF_W|BPF_LEN:
case BPF_LDX|BPF_B|BPF_MSH:
case BPF_LDX|BPF_IMM:
case BPF_MISC|BPF_TAX:
case BPF_MISC|BPF_TXA:
case BPF_RET|BPF_K:
case BPF_RET|BPF_A:
break;
/* Some instructions need special checks */
case BPF_ALU|BPF_DIV|BPF_K:
/* check for division by zero */
if (ftest->k == 0)
return -EINVAL;
break;
case BPF_LD|BPF_MEM:
case BPF_LDX|BPF_MEM:
case BPF_ST:
case BPF_STX:
/* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
break;
case BPF_JMP|BPF_JA:
/*
* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
* where offsets are limited. --ANK (981016)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
break;
case BPF_JMP|BPF_JEQ|BPF_K:
case BPF_JMP|BPF_JEQ|BPF_X:
case BPF_JMP|BPF_JGE|BPF_K:
case BPF_JMP|BPF_JGE|BPF_X:
case BPF_JMP|BPF_JGT|BPF_K:
case BPF_JMP|BPF_JGT|BPF_X:
case BPF_JMP|BPF_JSET|BPF_K:
case BPF_JMP|BPF_JSET|BPF_X:
/* for conditionals both must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
break;
default:
return -EINVAL;
}
}
return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
return -EINVAL;
fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
if (!fp)
return -ENOMEM;
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
sock_kfree_s(sk, fp, fsize+sizeof(*fp));
return -EFAULT;
}
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
err = sk_chk_filter(fp->insns, fp->len);
if (!err) {
struct sk_filter *old_fp;
rcu_read_lock_bh();
old_fp = rcu_dereference(sk->sk_filter);
rcu_assign_pointer(sk->sk_filter, fp);
rcu_read_unlock_bh();
fp = old_fp;
}
if (fp)
sk_filter_release(sk, fp);
return err;
}
EXPORT_SYMBOL(sk_chk_filter);
EXPORT_SYMBOL(sk_run_filter);

373
net/core/flow.c Normal file
View File

@@ -0,0 +1,373 @@
/* flow.c: Generic flow cache.
*
* Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
* Copyright (C) 2003 David S. Miller (davem@redhat.com)
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/percpu.h>
#include <linux/bitops.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/mutex.h>
#include <net/flow.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
#include <linux/security.h>
struct flow_cache_entry {
struct flow_cache_entry *next;
u16 family;
u8 dir;
struct flowi key;
u32 genid;
void *object;
atomic_t *object_ref;
};
atomic_t flow_cache_genid = ATOMIC_INIT(0);
static u32 flow_hash_shift;
#define flow_hash_size (1 << flow_hash_shift)
static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
#define flow_table(cpu) (per_cpu(flow_tables, cpu))
static struct kmem_cache *flow_cachep __read_mostly;
static int flow_lwm, flow_hwm;
struct flow_percpu_info {
int hash_rnd_recalc;
u32 hash_rnd;
int count;
} ____cacheline_aligned;
static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
#define flow_hash_rnd_recalc(cpu) \
(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
#define flow_hash_rnd(cpu) \
(per_cpu(flow_hash_info, cpu).hash_rnd)
#define flow_count(cpu) \
(per_cpu(flow_hash_info, cpu).count)
static struct timer_list flow_hash_rnd_timer;
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
struct flow_flush_info {
atomic_t cpuleft;
struct completion completion;
};
static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
static void flow_cache_new_hashrnd(unsigned long arg)
{
int i;
for_each_possible_cpu(i)
flow_hash_rnd_recalc(i) = 1;
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
}
static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
{
if (fle->object)
atomic_dec(fle->object_ref);
kmem_cache_free(flow_cachep, fle);
flow_count(cpu)--;
}
static void __flow_cache_shrink(int cpu, int shrink_to)
{
struct flow_cache_entry *fle, **flp;
int i;
for (i = 0; i < flow_hash_size; i++) {
int k = 0;
flp = &flow_table(cpu)[i];
while ((fle = *flp) != NULL && k < shrink_to) {
k++;
flp = &fle->next;
}
while ((fle = *flp) != NULL) {
*flp = fle->next;
flow_entry_kill(cpu, fle);
}
}
}
static void flow_cache_shrink(int cpu)
{
int shrink_to = flow_lwm / flow_hash_size;
__flow_cache_shrink(cpu, shrink_to);
}
static void flow_new_hash_rnd(int cpu)
{
get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
flow_hash_rnd_recalc(cpu) = 0;
__flow_cache_shrink(cpu, 0);
}
static u32 flow_hash_code(struct flowi *key, int cpu)
{
u32 *k = (u32 *) key;
return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
(flow_hash_size - 1));
}
#if (BITS_PER_LONG == 64)
typedef u64 flow_compare_t;
#else
typedef u32 flow_compare_t;
#endif
extern void flowi_is_missized(void);
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment and
* constant size.
*/
static int flow_key_compare(struct flowi *key1, struct flowi *key2)
{
flow_compare_t *k1, *k1_lim, *k2;
const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
if (sizeof(struct flowi) % sizeof(flow_compare_t))
flowi_is_missized();
k1 = (flow_compare_t *) key1;
k1_lim = k1 + n_elem;
k2 = (flow_compare_t *) key2;
do {
if (*k1++ != *k2++)
return 1;
} while (k1 < k1_lim);
return 0;
}
void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver)
{
struct flow_cache_entry *fle, **head;
unsigned int hash;
int cpu;
local_bh_disable();
cpu = smp_processor_id();
fle = NULL;
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
if (!flow_table(cpu))
goto nocache;
if (flow_hash_rnd_recalc(cpu))
flow_new_hash_rnd(cpu);
hash = flow_hash_code(key, cpu);
head = &flow_table(cpu)[hash];
for (fle = *head; fle; fle = fle->next) {
if (fle->family == family &&
fle->dir == dir &&
flow_key_compare(key, &fle->key) == 0) {
if (fle->genid == atomic_read(&flow_cache_genid)) {
void *ret = fle->object;
if (ret)
atomic_inc(fle->object_ref);
local_bh_enable();
return ret;
}
break;
}
}
if (!fle) {
if (flow_count(cpu) > flow_hwm)
flow_cache_shrink(cpu);
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
fle->next = *head;
*head = fle;
fle->family = family;
fle->dir = dir;
memcpy(&fle->key, key, sizeof(*key));
fle->object = NULL;
flow_count(cpu)++;
}
}
nocache:
{
int err;
void *obj;
atomic_t *obj_ref;
err = resolver(key, family, dir, &obj, &obj_ref);
if (fle && !err) {
fle->genid = atomic_read(&flow_cache_genid);
if (fle->object)
atomic_dec(fle->object_ref);
fle->object = obj;
fle->object_ref = obj_ref;
if (obj)
atomic_inc(fle->object_ref);
}
local_bh_enable();
if (err)
obj = ERR_PTR(err);
return obj;
}
}
static void flow_cache_flush_tasklet(unsigned long data)
{
struct flow_flush_info *info = (void *)data;
int i;
int cpu;
cpu = smp_processor_id();
for (i = 0; i < flow_hash_size; i++) {
struct flow_cache_entry *fle;
fle = flow_table(cpu)[i];
for (; fle; fle = fle->next) {
unsigned genid = atomic_read(&flow_cache_genid);
if (!fle->object || fle->genid == genid)
continue;
fle->object = NULL;
atomic_dec(fle->object_ref);
}
}
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
}
static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
int cpu;
struct tasklet_struct *tasklet;
cpu = smp_processor_id();
tasklet = flow_flush_tasklet(cpu);
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
void flow_cache_flush(void)
{
struct flow_flush_info info;
static DEFINE_MUTEX(flow_flush_sem);
/* Don't want cpus going down or up during this. */
lock_cpu_hotplug();
mutex_lock(&flow_flush_sem);
atomic_set(&info.cpuleft, num_online_cpus());
init_completion(&info.completion);
local_bh_disable();
smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
flow_cache_flush_tasklet((unsigned long)&info);
local_bh_enable();
wait_for_completion(&info.completion);
mutex_unlock(&flow_flush_sem);
unlock_cpu_hotplug();
}
static void __devinit flow_cache_cpu_prepare(int cpu)
{
struct tasklet_struct *tasklet;
unsigned long order;
for (order = 0;
(PAGE_SIZE << order) <
(sizeof(struct flow_cache_entry *)*flow_hash_size);
order++)
/* NOTHING */;
flow_table(cpu) = (struct flow_cache_entry **)
__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
if (!flow_table(cpu))
panic("NET: failed to allocate flow cache order %lu\n", order);
flow_hash_rnd_recalc(cpu) = 1;
flow_count(cpu) = 0;
tasklet = flow_flush_tasklet(cpu);
tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
}
static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
if (action == CPU_DEAD)
__flow_cache_shrink((unsigned long)hcpu, 0);
return NOTIFY_OK;
}
static int __init flow_cache_init(void)
{
int i;
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
flow_hash_shift = 10;
flow_lwm = 2 * flow_hash_size;
flow_hwm = 4 * flow_hash_size;
init_timer(&flow_hash_rnd_timer);
flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
for_each_possible_cpu(i)
flow_cache_cpu_prepare(i);
hotcpu_notifier(flow_cache_cpu, 0);
return 0;
}
module_init(flow_cache_init);
EXPORT_SYMBOL(flow_cache_genid);
EXPORT_SYMBOL(flow_cache_lookup);

249
net/core/gen_estimator.c Normal file
View File

@@ -0,0 +1,249 @@
/*
* net/sched/gen_estimator.c Simple rate estimator.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
* Jamal Hadi Salim - moved it to net/core and reshulfed
* names to make it usable in general net subsystem.
*/
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/sock.h>
#include <net/gen_stats.h>
/*
This code is NOT intended to be used for statistics collection,
its purpose is to provide a base for statistical multiplexing
for controlled load service.
If you need only statistics, run a user level daemon which
periodically reads byte counters.
Unfortunately, rate estimation is not a very easy task.
F.e. I did not find a simple way to estimate the current peak rate
and even failed to formulate the problem 8)8)
So I preferred not to built an estimator into the scheduler,
but run this task separately.
Ideally, it should be kernel thread(s), but for now it runs
from timers, which puts apparent top bounds on the number of rated
flows, has minimal overhead on small, but is enough
to handle controlled load service, sets of aggregates.
We measure rate over A=(1<<interval) seconds and evaluate EWMA:
avrate = avrate*(1-W) + rate*W
where W is chosen as negative power of 2: W = 2^(-ewma_log)
The resulting time constant is:
T = A/(-ln(1-W))
NOTES.
* The stored value for avbps is scaled by 2^5, so that maximal
rate is ~1Gbit, avpps is scaled by 2^10.
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
for HZ=100 and HZ=1024 8)), maximal interval
is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
are too expensive, longer ones can be implemented
at user level painlessly.
*/
#define EST_MAX_INTERVAL 5
struct gen_estimator
{
struct gen_estimator *next;
struct gnet_stats_basic *bstats;
struct gnet_stats_rate_est *rate_est;
spinlock_t *stats_lock;
unsigned interval;
int ewma_log;
u64 last_bytes;
u32 last_packets;
u32 avpps;
u32 avbps;
};
struct gen_estimator_head
{
struct timer_list timer;
struct gen_estimator *list;
};
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
/* Estimator array lock */
static DEFINE_RWLOCK(est_lock);
static void est_timer(unsigned long arg)
{
int idx = (int)arg;
struct gen_estimator *e;
read_lock(&est_lock);
for (e = elist[idx].list; e; e = e->next) {
u64 nbytes;
u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
rate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
e->rate_est->bps = (e->avbps+0xF)>>5;
rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
e->rate_est->pps = (e->avpps+0x1FF)>>10;
spin_unlock(e->stats_lock);
}
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
read_unlock(&est_lock);
}
/**
* gen_new_estimator - create a new rate estimator
* @bstats: basic statistics
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
* as destination. A new timer with the interval specified in the
* configuration TLV is created. Upon each interval, the latest statistics
* will be read from &bstats and the estimated rate will be stored in
* &rate_est with the statistics lock grabed during this period.
*
* Returns 0 on success or a negative error code.
*/
int gen_new_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = RTA_DATA(opt);
if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL;
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
est = kzalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOBUFS;
est->interval = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
est->last_bytes = bstats->bytes;
est->avbps = rate_est->bps<<5;
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
est->next = elist[est->interval].list;
if (est->next == NULL) {
init_timer(&elist[est->interval].timer);
elist[est->interval].timer.data = est->interval;
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
elist[est->interval].timer.function = est_timer;
add_timer(&elist[est->interval].timer);
}
write_lock_bh(&est_lock);
elist[est->interval].list = est;
write_unlock_bh(&est_lock);
return 0;
}
/**
* gen_kill_estimator - remove a rate estimator
* @bstats: basic statistics
* @rate_est: rate estimator statistics
*
* Removes the rate estimator specified by &bstats and &rate_est
* and deletes the timer.
*/
void gen_kill_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est)
{
int idx;
struct gen_estimator *est, **pest;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
int killed = 0;
pest = &elist[idx].list;
while ((est=*pest) != NULL) {
if (est->rate_est != rate_est || est->bstats != bstats) {
pest = &est->next;
continue;
}
write_lock_bh(&est_lock);
*pest = est->next;
write_unlock_bh(&est_lock);
kfree(est);
killed++;
}
if (killed && elist[idx].list == NULL)
del_timer(&elist[idx].timer);
}
}
/**
* gen_replace_estimator - replace rate estimator configruation
* @bstats: basic statistics
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
* gen_kill_estimator() and gen_new_estimator().
*
* Returns 0 on success or a negative error code.
*/
int
gen_replace_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
struct rtattr *opt)
{
gen_kill_estimator(bstats, rate_est);
return gen_new_estimator(bstats, rate_est, stats_lock, opt);
}
EXPORT_SYMBOL(gen_kill_estimator);
EXPORT_SYMBOL(gen_new_estimator);
EXPORT_SYMBOL(gen_replace_estimator);

239
net/core/gen_stats.c Normal file
View File

@@ -0,0 +1,239 @@
/*
* net/core/gen_stats.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Thomas Graf <tgraf@suug.ch>
* Jamal Hadi Salim
* Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* See Documentation/networking/gen_stats.txt
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/socket.h>
#include <linux/rtnetlink.h>
#include <linux/gen_stats.h>
#include <net/gen_stats.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
{
RTA_PUT(d->skb, type, size, buf);
return 0;
rtattr_failure:
spin_unlock_bh(d->lock);
return -1;
}
/**
* gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
* @skb: socket buffer to put statistics TLVs into
* @type: TLV type for top level statistic TLV
* @tc_stats_type: TLV type for backward compatibility struct tc_stats TLV
* @xstats_type: TLV type for backward compatibility xstats TLV
* @lock: statistics lock
* @d: dumping handle
*
* Initializes the dumping handle, grabs the statistic lock and appends
* an empty TLV header to the socket buffer for use a container for all
* other statistic TLVS.
*
* The dumping handle is marked to be in backward compatibility mode telling
* all gnet_stats_copy_XXX() functions to fill a local copy of struct tc_stats.
*
* Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
int xstats_type, spinlock_t *lock, struct gnet_dump *d)
{
memset(d, 0, sizeof(*d));
spin_lock_bh(lock);
d->lock = lock;
if (type)
d->tail = (struct rtattr *) skb->tail;
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
if (d->tail)
return gnet_stats_copy(d, type, NULL, 0);
return 0;
}
/**
* gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
* @skb: socket buffer to put statistics TLVs into
* @type: TLV type for top level statistic TLV
* @lock: statistics lock
* @d: dumping handle
*
* Initializes the dumping handle, grabs the statistic lock and appends
* an empty TLV header to the socket buffer for use a container for all
* other statistic TLVS.
*
* Returns 0 on success or -1 if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d)
{
return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
}
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @d: dumping handle
* @b: basic statistics
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
{
if (d->compat_tc_stats) {
d->tc_stats.bytes = b->bytes;
d->tc_stats.packets = b->packets;
}
if (d->tail)
return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
return 0;
}
/**
* gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
* @d: dumping handle
* @r: rate estimator statistics
*
* Appends the rate estimator statistics to the top level TLV created by
* gnet_stats_start_copy().
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
{
if (d->compat_tc_stats) {
d->tc_stats.bps = r->bps;
d->tc_stats.pps = r->pps;
}
if (d->tail)
return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
return 0;
}
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
* @d: dumping handle
* @q: queue statistics
*
* Appends the queue statistics to the top level TLV created by
* gnet_stats_start_copy().
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
{
if (d->compat_tc_stats) {
d->tc_stats.drops = q->drops;
d->tc_stats.qlen = q->qlen;
d->tc_stats.backlog = q->backlog;
d->tc_stats.overlimits = q->overlimits;
}
if (d->tail)
return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
return 0;
}
/**
* gnet_stats_copy_app - copy application specific statistics into statistics TLV
* @d: dumping handle
* @st: application specific statistics data
* @len: length of data
*
* Appends the application sepecific statistics to the top level TLV created by
* gnet_stats_start_copy() and remembers the data for XSTATS if the dumping
* handle is in backward compatibility mode.
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
{
if (d->compat_xstats) {
d->xstats = st;
d->xstats_len = len;
}
if (d->tail)
return gnet_stats_copy(d, TCA_STATS_APP, st, len);
return 0;
}
/**
* gnet_stats_finish_copy - finish dumping procedure
* @d: dumping handle
*
* Corrects the length of the top level TLV to include all TLVs added
* by gnet_stats_copy_XXX() calls. Adds the backward compatibility TLVs
* if gnet_stats_start_copy_compat() was used and releases the statistics
* lock.
*
* Returns 0 on success or -1 with the statistic lock released
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_finish_copy(struct gnet_dump *d)
{
if (d->tail)
d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
sizeof(d->tc_stats)) < 0)
return -1;
if (d->compat_xstats && d->xstats) {
if (gnet_stats_copy(d, d->compat_xstats, d->xstats,
d->xstats_len) < 0)
return -1;
}
spin_unlock_bh(d->lock);
return 0;
}
EXPORT_SYMBOL(gnet_stats_start_copy);
EXPORT_SYMBOL(gnet_stats_start_copy_compat);
EXPORT_SYMBOL(gnet_stats_copy_basic);
EXPORT_SYMBOL(gnet_stats_copy_rate_est);
EXPORT_SYMBOL(gnet_stats_copy_queue);
EXPORT_SYMBOL(gnet_stats_copy_app);
EXPORT_SYMBOL(gnet_stats_finish_copy);

238
net/core/iovec.c Normal file
View File

@@ -0,0 +1,238 @@
/*
* iovec manipulation routines.
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Andrew Lunn : Errors in iovec copying.
* Pedro Roque : Added memcpy_fromiovecend and
* csum_..._fromiovecend.
* Andi Kleen : fixed error handling for 2.1
* Alexey Kuznetsov: 2.1 optimisations
* Andi Kleen : Fix csum*fromiovecend for IPv6.
*/
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include <net/checksum.h>
#include <net/sock.h>
/*
* Verify iovec. The caller must ensure that the iovec is big enough
* to hold the message iovec.
*
* Save time not doing access_ok. copy_*_user will make this work
* in any case.
*/
int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode)
{
int size, err, ct;
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
address);
if (err < 0)
return err;
}
m->msg_name = address;
} else {
m->msg_name = NULL;
}
size = m->msg_iovlen * sizeof(struct iovec);
if (copy_from_user(iov, m->msg_iov, size))
return -EFAULT;
m->msg_iov = iov;
err = 0;
for (ct = 0; ct < m->msg_iovlen; ct++) {
err += iov[ct].iov_len;
/*
* Goal is not to verify user data, but to prevent returning
* negative value, which is interpreted as errno.
* Overflow is still possible, but it is harmless.
*/
if (err < 0)
return -EMSGSIZE;
}
return err;
}
/*
* Copy kernel to iovec. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
{
while (len > 0) {
if (iov->iov_len) {
int copy = min_t(unsigned int, iov->iov_len, len);
if (copy_to_user(iov->iov_base, kdata, copy))
return -EFAULT;
kdata += copy;
len -= copy;
iov->iov_len -= copy;
iov->iov_base += copy;
}
iov++;
}
return 0;
}
/*
* Copy iovec to kernel. Returns -EFAULT on error.
*
* Note: this modifies the original iovec.
*/
int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
{
while (len > 0) {
if (iov->iov_len) {
int copy = min_t(unsigned int, len, iov->iov_len);
if (copy_from_user(kdata, iov->iov_base, copy))
return -EFAULT;
len -= copy;
kdata += copy;
iov->iov_base += copy;
iov->iov_len -= copy;
}
iov++;
}
return 0;
}
/*
* For use with ip_build_xmit
*/
int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
int len)
{
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
iov++;
}
while (len > 0) {
u8 __user *base = iov->iov_base + offset;
int copy = min_t(unsigned int, len, iov->iov_len - offset);
offset = 0;
if (copy_from_user(kdata, base, copy))
return -EFAULT;
len -= copy;
kdata += copy;
iov++;
}
return 0;
}
/*
* And now for the all-in-one: copy and checksum from a user iovec
* directly to a datagram
* Calls to csum_partial but the last must be in 32 bit chunks
*
* ip_build_xmit must ensure that when fragmenting only the last
* call to this function will be unaligned also.
*/
int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
int offset, unsigned int len, __wsum *csump)
{
__wsum csum = *csump;
int partial_cnt = 0, err = 0;
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
iov++;
}
while (len > 0) {
u8 __user *base = iov->iov_base + offset;
int copy = min_t(unsigned int, len, iov->iov_len - offset);
offset = 0;
/* There is a remnant from previous iov. */
if (partial_cnt) {
int par_len = 4 - partial_cnt;
/* iov component is too short ... */
if (par_len > copy) {
if (copy_from_user(kdata, base, copy))
goto out_fault;
kdata += copy;
base += copy;
partial_cnt += copy;
len -= copy;
iov++;
if (len)
continue;
*csump = csum_partial(kdata - partial_cnt,
partial_cnt, csum);
goto out;
}
if (copy_from_user(kdata, base, par_len))
goto out_fault;
csum = csum_partial(kdata - partial_cnt, 4, csum);
kdata += par_len;
base += par_len;
copy -= par_len;
len -= par_len;
partial_cnt = 0;
}
if (len > copy) {
partial_cnt = copy % 4;
if (partial_cnt) {
copy -= partial_cnt;
if (copy_from_user(kdata + copy, base + copy,
partial_cnt))
goto out_fault;
}
}
if (copy) {
csum = csum_and_copy_from_user(base, kdata, copy,
csum, &err);
if (err)
goto out;
}
len -= copy + partial_cnt;
kdata += copy + partial_cnt;
iov++;
}
*csump = csum;
out:
return err;
out_fault:
err = -EFAULT;
goto out;
}
EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
EXPORT_SYMBOL(memcpy_fromiovec);
EXPORT_SYMBOL(memcpy_fromiovecend);
EXPORT_SYMBOL(memcpy_toiovec);

19
net/core/kmap_skb.h Normal file
View File

@@ -0,0 +1,19 @@
#include <linux/highmem.h>
static inline void *kmap_skb_frag(const skb_frag_t *frag)
{
#ifdef CONFIG_HIGHMEM
BUG_ON(in_irq());
local_bh_disable();
#endif
return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
}
static inline void kunmap_skb_frag(void *vaddr)
{
kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
#ifdef CONFIG_HIGHMEM
local_bh_enable();
#endif
}

181
net/core/link_watch.c Normal file
View File

@@ -0,0 +1,181 @@
/*
* Linux network device link state notification
*
* Author:
* Stefan Rompf <sux@loplof.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/if.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <asm/types.h>
enum lw_bits {
LW_RUNNING = 0,
LW_SE_USED
};
static unsigned long linkwatch_flags;
static unsigned long linkwatch_nextevent;
static void linkwatch_event(struct work_struct *dummy);
static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
static LIST_HEAD(lweventlist);
static DEFINE_SPINLOCK(lweventlist_lock);
struct lw_event {
struct list_head list;
struct net_device *dev;
};
/* Avoid kmalloc() for most systems */
static struct lw_event singleevent;
static unsigned char default_operstate(const struct net_device *dev)
{
if (!netif_carrier_ok(dev))
return (dev->ifindex != dev->iflink ?
IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
if (netif_dormant(dev))
return IF_OPER_DORMANT;
return IF_OPER_UP;
}
static void rfc2863_policy(struct net_device *dev)
{
unsigned char operstate = default_operstate(dev);
if (operstate == dev->operstate)
return;
write_lock_bh(&dev_base_lock);
switch(dev->link_mode) {
case IF_LINK_MODE_DORMANT:
if (operstate == IF_OPER_UP)
operstate = IF_OPER_DORMANT;
break;
case IF_LINK_MODE_DEFAULT:
default:
break;
};
dev->operstate = operstate;
write_unlock_bh(&dev_base_lock);
}
/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
{
struct list_head head, *n, *next;
spin_lock_irq(&lweventlist_lock);
list_replace_init(&lweventlist, &head);
spin_unlock_irq(&lweventlist_lock);
list_for_each_safe(n, next, &head) {
struct lw_event *event = list_entry(n, struct lw_event, list);
struct net_device *dev = event->dev;
if (event == &singleevent) {
clear_bit(LW_SE_USED, &linkwatch_flags);
} else {
kfree(event);
}
/* We are about to handle this device,
* so new events can be accepted
*/
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev)) {
WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
dev_activate(dev);
} else
dev_deactivate(dev);
netdev_state_change(dev);
}
dev_put(dev);
}
}
static void linkwatch_event(struct work_struct *dummy)
{
/* Limit the number of linkwatch events to one
* per second so that a runaway driver does not
* cause a storm of messages on the netlink
* socket
*/
linkwatch_nextevent = jiffies + HZ;
clear_bit(LW_RUNNING, &linkwatch_flags);
rtnl_lock();
linkwatch_run_queue();
rtnl_unlock();
}
void linkwatch_fire_event(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
unsigned long flags;
struct lw_event *event;
if (test_and_set_bit(LW_SE_USED, &linkwatch_flags)) {
event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC);
if (unlikely(event == NULL)) {
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
return;
}
} else {
event = &singleevent;
}
dev_hold(dev);
event->dev = dev;
spin_lock_irqsave(&lweventlist_lock, flags);
list_add_tail(&event->list, &lweventlist);
spin_unlock_irqrestore(&lweventlist_lock, flags);
if (!test_and_set_bit(LW_RUNNING, &linkwatch_flags)) {
unsigned long delay = linkwatch_nextevent - jiffies;
/* If we wrap around we'll delay it by at most HZ. */
if (delay > HZ)
delay = 0;
schedule_delayed_work(&linkwatch_work, delay);
}
}
}
EXPORT_SYMBOL(linkwatch_fire_event);

2778
net/core/neighbour.c Normal file

File diff suppressed because it is too large Load Diff

493
net/core/net-sysfs.c Normal file
View File

@@ -0,0 +1,493 @@
/*
* net-sysfs.c - network device class and attributes
*
* Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#include <net/iw_handler.h>
static const char fmt_hex[] = "%#x\n";
static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
static const char fmt_ulong[] = "%lu\n";
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state <= NETREG_REGISTERED;
}
/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
struct device_attribute *attr, char *buf,
ssize_t (*format)(const struct net_device *, char *))
{
struct net_device *net = to_net_dev(dev);
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
if (dev_isalive(net))
ret = (*format)(net, buf);
read_unlock(&dev_base_lock);
return ret;
}
/* generate a show function for simple field */
#define NETDEVICE_SHOW(field, format_string) \
static ssize_t format_##field(const struct net_device *net, char *buf) \
{ \
return sprintf(buf, format_string, net->field); \
} \
static ssize_t show_##field(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
return netdev_show(dev, attr, buf, format_##field); \
}
/* use same locking and permission rules as SIF* ioctl's */
static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len,
int (*set)(struct net_device *, unsigned long))
{
struct net_device *net = to_net_dev(dev);
char *endp;
unsigned long new;
int ret = -EINVAL;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
new = simple_strtoul(buf, &endp, 0);
if (endp == buf)
goto err;
rtnl_lock();
if (dev_isalive(net)) {
if ((ret = (*set)(net, new)) == 0)
ret = len;
}
rtnl_unlock();
err:
return ret;
}
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
NETDEVICE_SHOW(features, fmt_long_hex);
NETDEVICE_SHOW(type, fmt_dec);
NETDEVICE_SHOW(link_mode, fmt_dec);
/* use same locking rules as GIFHWADDR ioctl's */
static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
{
int i;
char *cp = buf;
for (i = 0; i < len; i++)
cp += sprintf(cp, "%02x%c", addr[i],
i == (len - 1) ? '\n' : ':');
return cp - buf;
}
static ssize_t show_address(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct net_device *net = to_net_dev(dev);
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
if (dev_isalive(net))
ret = format_addr(buf, net->dev_addr, net->addr_len);
read_unlock(&dev_base_lock);
return ret;
}
static ssize_t show_broadcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *net = to_net_dev(dev);
if (dev_isalive(net))
return format_addr(buf, net->broadcast, net->addr_len);
return -EINVAL;
}
static ssize_t show_carrier(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
if (netif_running(netdev)) {
return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
}
return -EINVAL;
}
static ssize_t show_dormant(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
if (netif_running(netdev))
return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
return -EINVAL;
}
static const char *operstates[] = {
"unknown",
"notpresent", /* currently unused */
"down",
"lowerlayerdown",
"testing", /* currently unused */
"dormant",
"up"
};
static ssize_t show_operstate(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
unsigned char operstate;
read_lock(&dev_base_lock);
operstate = netdev->operstate;
if (!netif_running(netdev))
operstate = IF_OPER_DOWN;
read_unlock(&dev_base_lock);
if (operstate >= ARRAY_SIZE(operstates))
return -EINVAL; /* should not happen */
return sprintf(buf, "%s\n", operstates[operstate]);
}
/* read-write attributes */
NETDEVICE_SHOW(mtu, fmt_dec);
static int change_mtu(struct net_device *net, unsigned long new_mtu)
{
return dev_set_mtu(net, (int) new_mtu);
}
static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_mtu);
}
NETDEVICE_SHOW(flags, fmt_hex);
static int change_flags(struct net_device *net, unsigned long new_flags)
{
return dev_change_flags(net, (unsigned) new_flags);
}
static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_flags);
}
NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
{
net->tx_queue_len = new_len;
return 0;
}
static ssize_t store_tx_queue_len(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
NETDEVICE_SHOW(weight, fmt_dec);
static int change_weight(struct net_device *net, unsigned long new_weight)
{
net->weight = new_weight;
return 0;
}
static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_weight);
}
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
__ATTR(iflink, S_IRUGO, show_iflink, NULL),
__ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
__ATTR(features, S_IRUGO, show_features, NULL),
__ATTR(type, S_IRUGO, show_type, NULL),
__ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
__ATTR(address, S_IRUGO, show_address, NULL),
__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
__ATTR(carrier, S_IRUGO, show_carrier, NULL),
__ATTR(dormant, S_IRUGO, show_dormant, NULL),
__ATTR(operstate, S_IRUGO, show_operstate, NULL),
__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len),
__ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
{}
};
/* Show a given an attribute in the statistics group */
static ssize_t netstat_show(const struct device *d,
struct device_attribute *attr, char *buf,
unsigned long offset)
{
struct net_device *dev = to_net_dev(d);
struct net_device_stats *stats;
ssize_t ret = -EINVAL;
if (offset > sizeof(struct net_device_stats) ||
offset % sizeof(unsigned long) != 0)
WARN_ON(1);
read_lock(&dev_base_lock);
if (dev_isalive(dev) && dev->get_stats &&
(stats = (*dev->get_stats)(dev)))
ret = sprintf(buf, fmt_ulong,
*(unsigned long *)(((u8 *) stats) + offset));
read_unlock(&dev_base_lock);
return ret;
}
/* generate a read-only statistics attribute */
#define NETSTAT_ENTRY(name) \
static ssize_t show_##name(struct device *d, \
struct device_attribute *attr, char *buf) \
{ \
return netstat_show(d, attr, buf, \
offsetof(struct net_device_stats, name)); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
NETSTAT_ENTRY(rx_packets);
NETSTAT_ENTRY(tx_packets);
NETSTAT_ENTRY(rx_bytes);
NETSTAT_ENTRY(tx_bytes);
NETSTAT_ENTRY(rx_errors);
NETSTAT_ENTRY(tx_errors);
NETSTAT_ENTRY(rx_dropped);
NETSTAT_ENTRY(tx_dropped);
NETSTAT_ENTRY(multicast);
NETSTAT_ENTRY(collisions);
NETSTAT_ENTRY(rx_length_errors);
NETSTAT_ENTRY(rx_over_errors);
NETSTAT_ENTRY(rx_crc_errors);
NETSTAT_ENTRY(rx_frame_errors);
NETSTAT_ENTRY(rx_fifo_errors);
NETSTAT_ENTRY(rx_missed_errors);
NETSTAT_ENTRY(tx_aborted_errors);
NETSTAT_ENTRY(tx_carrier_errors);
NETSTAT_ENTRY(tx_fifo_errors);
NETSTAT_ENTRY(tx_heartbeat_errors);
NETSTAT_ENTRY(tx_window_errors);
NETSTAT_ENTRY(rx_compressed);
NETSTAT_ENTRY(tx_compressed);
static struct attribute *netstat_attrs[] = {
&dev_attr_rx_packets.attr,
&dev_attr_tx_packets.attr,
&dev_attr_rx_bytes.attr,
&dev_attr_tx_bytes.attr,
&dev_attr_rx_errors.attr,
&dev_attr_tx_errors.attr,
&dev_attr_rx_dropped.attr,
&dev_attr_tx_dropped.attr,
&dev_attr_multicast.attr,
&dev_attr_collisions.attr,
&dev_attr_rx_length_errors.attr,
&dev_attr_rx_over_errors.attr,
&dev_attr_rx_crc_errors.attr,
&dev_attr_rx_frame_errors.attr,
&dev_attr_rx_fifo_errors.attr,
&dev_attr_rx_missed_errors.attr,
&dev_attr_tx_aborted_errors.attr,
&dev_attr_tx_carrier_errors.attr,
&dev_attr_tx_fifo_errors.attr,
&dev_attr_tx_heartbeat_errors.attr,
&dev_attr_tx_window_errors.attr,
&dev_attr_rx_compressed.attr,
&dev_attr_tx_compressed.attr,
NULL
};
static struct attribute_group netstat_group = {
.name = "statistics",
.attrs = netstat_attrs,
};
#ifdef CONFIG_WIRELESS_EXT
/* helper function that does all the locking etc for wireless stats */
static ssize_t wireless_show(struct device *d, char *buf,
ssize_t (*format)(const struct iw_statistics *,
char *))
{
struct net_device *dev = to_net_dev(d);
const struct iw_statistics *iw = NULL;
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
if (dev_isalive(dev)) {
if(dev->wireless_handlers &&
dev->wireless_handlers->get_wireless_stats)
iw = dev->wireless_handlers->get_wireless_stats(dev);
if (iw != NULL)
ret = (*format)(iw, buf);
}
read_unlock(&dev_base_lock);
return ret;
}
/* show function template for wireless fields */
#define WIRELESS_SHOW(name, field, format_string) \
static ssize_t format_iw_##name(const struct iw_statistics *iw, char *buf) \
{ \
return sprintf(buf, format_string, iw->field); \
} \
static ssize_t show_iw_##name(struct device *d, \
struct device_attribute *attr, char *buf) \
{ \
return wireless_show(d, buf, format_iw_##name); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_iw_##name, NULL)
WIRELESS_SHOW(status, status, fmt_hex);
WIRELESS_SHOW(link, qual.qual, fmt_dec);
WIRELESS_SHOW(level, qual.level, fmt_dec);
WIRELESS_SHOW(noise, qual.noise, fmt_dec);
WIRELESS_SHOW(nwid, discard.nwid, fmt_dec);
WIRELESS_SHOW(crypt, discard.code, fmt_dec);
WIRELESS_SHOW(fragment, discard.fragment, fmt_dec);
WIRELESS_SHOW(misc, discard.misc, fmt_dec);
WIRELESS_SHOW(retries, discard.retries, fmt_dec);
WIRELESS_SHOW(beacon, miss.beacon, fmt_dec);
static struct attribute *wireless_attrs[] = {
&dev_attr_status.attr,
&dev_attr_link.attr,
&dev_attr_level.attr,
&dev_attr_noise.attr,
&dev_attr_nwid.attr,
&dev_attr_crypt.attr,
&dev_attr_fragment.attr,
&dev_attr_retries.attr,
&dev_attr_misc.attr,
&dev_attr_beacon.attr,
NULL
};
static struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
#endif
#ifdef CONFIG_HOTPLUG
static int netdev_uevent(struct device *d, char **envp,
int num_envp, char *buf, int size)
{
struct net_device *dev = to_net_dev(d);
int i = 0;
int n;
/* pass interface to uevent. */
envp[i++] = buf;
n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
buf += n;
size -= n;
if ((size <= 0) || (i >= num_envp))
return -ENOMEM;
envp[i] = NULL;
return 0;
}
#endif
/*
* netdev_release -- destroy and free a dead device.
* Called when last reference to device kobject is gone.
*/
static void netdev_release(struct device *d)
{
struct net_device *dev = to_net_dev(d);
BUG_ON(dev->reg_state != NETREG_RELEASED);
kfree((char *)dev - dev->padded);
}
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
.dev_attrs = net_class_attributes,
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
};
/* Delete sysfs entries but hold kobject reference until after all
* netdev references are gone.
*/
void netdev_unregister_sysfs(struct net_device * net)
{
struct device *dev = &(net->dev);
kobject_get(&dev->kobj);
device_del(dev);
}
/* Create sysfs entries for network device. */
int netdev_register_sysfs(struct net_device *net)
{
struct device *dev = &(net->dev);
struct attribute_group **groups = net->sysfs_groups;
device_initialize(dev);
dev->class = &net_class;
dev->platform_data = net;
dev->groups = groups;
BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
if (net->get_stats)
*groups++ = &netstat_group;
#ifdef CONFIG_WIRELESS_EXT
if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
*groups++ = &wireless_group;
#endif
return device_add(dev);
}
int netdev_sysfs_init(void)
{
return class_register(&net_class);
}

69
net/core/netevent.c Normal file
View File

@@ -0,0 +1,69 @@
/*
* Network event notifiers
*
* Authors:
* Tom Tucker <tom@opengridcomputing.com>
* Steve Wise <swise@opengridcomputing.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
*/
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain);
/**
* register_netevent_notifier - register a netevent notifier block
* @nb: notifier
*
* Register a notifier to be called when a netevent occurs.
* The notifier passed is linked into the kernel structures and must
* not be reused until it has been unregistered. A negative errno code
* is returned on a failure.
*/
int register_netevent_notifier(struct notifier_block *nb)
{
int err;
err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
return err;
}
/**
* netevent_unregister_notifier - unregister a netevent notifier block
* @nb: notifier
*
* Unregister a notifier previously registered by
* register_neigh_notifier(). The notifier is unlinked into the
* kernel structures and may then be reused. A negative errno code
* is returned on a failure.
*/
int unregister_netevent_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
}
/**
* call_netevent_notifiers - call all netevent notifier blocks
* @val: value passed unmodified to notifier function
* @v: pointer passed unmodified to notifier function
*
* Call all neighbour notifier blocks. Parameters and return value
* are as for notifier_call_chain().
*/
int call_netevent_notifiers(unsigned long val, void *v)
{
return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
}
EXPORT_SYMBOL_GPL(register_netevent_notifier);
EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
EXPORT_SYMBOL_GPL(call_netevent_notifiers);

815
net/core/netpoll.c Normal file
View File

@@ -0,0 +1,815 @@
/*
* Common framework for low-level network console, dump, and debugger code
*
* Sep 8 2003 Matt Mackall <mpm@selenic.com>
*
* based on the netconsole code from:
*
* Copyright (C) 2001 Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2002 Red Hat, Inc.
*/
#include <linux/smp_lock.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
#include <linux/if_arp.h>
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
/*
* We maintain a small pool of fully-sized skbs, to make sure the
* message gets out even in extreme OOM situations.
*/
#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32
#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
static struct sk_buff_head skb_pool;
static atomic_t trapped;
#define USEC_PER_POLL 50
#define NETPOLL_RX_ENABLED 1
#define NETPOLL_RX_DROP 2
#define MAX_SKB_SIZE \
(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
sizeof(struct iphdr) + sizeof(struct ethhdr))
static void zap_completion_queue(void);
static void arp_reply(struct sk_buff *skb);
static void queue_process(struct work_struct *work)
{
struct netpoll_info *npinfo =
container_of(work, struct netpoll_info, tx_work.work);
struct sk_buff *skb;
unsigned long flags;
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
if (!netif_device_present(dev) || !netif_running(dev)) {
__kfree_skb(skb);
continue;
}
local_irq_save(flags);
netif_tx_lock(dev);
if (netif_queue_stopped(dev) ||
dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
netif_tx_unlock(dev);
local_irq_restore(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10);
return;
}
netif_tx_unlock(dev);
local_irq_restore(flags);
}
}
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
unsigned short ulen, __be32 saddr, __be32 daddr)
{
__wsum psum;
if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
return 0;
psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
if (skb->ip_summed == CHECKSUM_COMPLETE &&
!csum_fold(csum_add(psum, skb->csum)))
return 0;
skb->csum = psum;
return __skb_checksum_complete(skb);
}
/*
* Check whether delayed processing was scheduled for our NIC. If so,
* we attempt to grab the poll lock and use ->poll() to pump the card.
* If this fails, either we've recursed in ->poll() or it's already
* running on another CPU.
*
* Note: we don't mask interrupts with this lock because we're using
* trylock here and interrupts are already disabled in the softirq
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
*
* In cases where there is bi-directional communications, reading only
* one message at a time can lead to packets being dropped by the
* network adapter, forcing superfluous retries and possibly timeouts.
* Thus, we set our budget to greater than 1.
*/
static void poll_napi(struct netpoll *np)
{
struct netpoll_info *npinfo = np->dev->npinfo;
int budget = 16;
if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
npinfo->poll_owner != smp_processor_id() &&
spin_trylock(&npinfo->poll_lock)) {
npinfo->rx_flags |= NETPOLL_RX_DROP;
atomic_inc(&trapped);
np->dev->poll(np->dev, &budget);
atomic_dec(&trapped);
npinfo->rx_flags &= ~NETPOLL_RX_DROP;
spin_unlock(&npinfo->poll_lock);
}
}
static void service_arp_queue(struct netpoll_info *npi)
{
struct sk_buff *skb;
if (unlikely(!npi))
return;
skb = skb_dequeue(&npi->arp_tx);
while (skb != NULL) {
arp_reply(skb);
skb = skb_dequeue(&npi->arp_tx);
}
}
void netpoll_poll(struct netpoll *np)
{
if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
return;
/* Process pending work on NIC */
np->dev->poll_controller(np->dev);
if (np->dev->poll)
poll_napi(np);
service_arp_queue(np->dev->npinfo);
zap_completion_queue();
}
static void refill_skbs(void)
{
struct sk_buff *skb;
unsigned long flags;
spin_lock_irqsave(&skb_pool.lock, flags);
while (skb_pool.qlen < MAX_SKBS) {
skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
if (!skb)
break;
__skb_queue_tail(&skb_pool, skb);
}
spin_unlock_irqrestore(&skb_pool.lock, flags);
}
static void zap_completion_queue(void)
{
unsigned long flags;
struct softnet_data *sd = &get_cpu_var(softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
local_irq_save(flags);
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_restore(flags);
while (clist != NULL) {
struct sk_buff *skb = clist;
clist = clist->next;
if (skb->destructor)
dev_kfree_skb_any(skb); /* put this one back */
else
__kfree_skb(skb);
}
}
put_cpu_var(softnet_data);
}
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
{
int count = 0;
struct sk_buff *skb;
zap_completion_queue();
refill_skbs();
repeat:
skb = alloc_skb(len, GFP_ATOMIC);
if (!skb)
skb = skb_dequeue(&skb_pool);
if (!skb) {
if (++count < 10) {
netpoll_poll(np);
goto repeat;
}
return NULL;
}
atomic_set(&skb->users, 1);
skb_reserve(skb, reserve);
return skb;
}
static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
struct net_device *dev = np->dev;
struct netpoll_info *npinfo = np->dev->npinfo;
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
__kfree_skb(skb);
return;
}
/* don't get messages out of order, and no recursion */
if (skb_queue_len(&npinfo->txq) == 0 &&
npinfo->poll_owner != smp_processor_id()) {
unsigned long flags;
local_irq_save(flags);
if (netif_tx_trylock(dev)) {
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (!netif_queue_stopped(dev))
status = dev->hard_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
break;
/* tickle device maybe there is some cleanup */
netpoll_poll(np);
udelay(USEC_PER_POLL);
}
netif_tx_unlock(dev);
}
local_irq_restore(flags);
}
if (status != NETDEV_TX_OK) {
skb_queue_tail(&npinfo->txq, skb);
schedule_delayed_work(&npinfo->tx_work,0);
}
}
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
int total_len, eth_len, ip_len, udp_len;
struct sk_buff *skb;
struct udphdr *udph;
struct iphdr *iph;
struct ethhdr *eth;
udp_len = len + sizeof(*udph);
ip_len = eth_len = udp_len + sizeof(*iph);
total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
skb = find_skb(np, total_len, total_len - len);
if (!skb)
return;
memcpy(skb->data, msg, len);
skb->len += len;
skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
udph->source = htons(np->local_port);
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
udph->check = 0;
udph->check = csum_tcpudp_magic(htonl(np->local_ip),
htonl(np->remote_ip),
udp_len, IPPROTO_UDP,
csum_partial((unsigned char *)udph, udp_len, 0));
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
/* iph->version = 4; iph->ihl = 5; */
put_unaligned(0x45, (unsigned char *)iph);
iph->tos = 0;
put_unaligned(htons(ip_len), &(iph->tot_len));
iph->id = 0;
iph->frag_off = 0;
iph->ttl = 64;
iph->protocol = IPPROTO_UDP;
iph->check = 0;
put_unaligned(htonl(np->local_ip), &(iph->saddr));
put_unaligned(htonl(np->remote_ip), &(iph->daddr));
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
skb->mac.raw = skb->data;
skb->protocol = eth->h_proto = htons(ETH_P_IP);
memcpy(eth->h_source, np->local_mac, 6);
memcpy(eth->h_dest, np->remote_mac, 6);
skb->dev = np->dev;
netpoll_send_skb(np, skb);
}
static void arp_reply(struct sk_buff *skb)
{
struct netpoll_info *npinfo = skb->dev->npinfo;
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
__be32 sip, tip;
unsigned char *sha;
struct sk_buff *send_skb;
struct netpoll *np = NULL;
if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
np = npinfo->rx_np;
if (!np)
return;
/* No arp on this interface */
if (skb->dev->flags & IFF_NOARP)
return;
if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
(2 * skb->dev->addr_len) +
(2 * sizeof(u32)))))
return;
skb->h.raw = skb->nh.raw = skb->data;
arp = skb->nh.arph;
if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
arp->ar_pro != htons(ETH_P_IP) ||
arp->ar_op != htons(ARPOP_REQUEST))
return;
arp_ptr = (unsigned char *)(arp+1);
/* save the location of the src hw addr */
sha = arp_ptr;
arp_ptr += skb->dev->addr_len;
memcpy(&sip, arp_ptr, 4);
arp_ptr += 4;
/* if we actually cared about dst hw addr, it would get copied here */
arp_ptr += skb->dev->addr_len;
memcpy(&tip, arp_ptr, 4);
/* Should we ignore arp? */
if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
return;
size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
LL_RESERVED_SPACE(np->dev));
if (!send_skb)
return;
send_skb->nh.raw = send_skb->data;
arp = (struct arphdr *) skb_put(send_skb, size);
send_skb->dev = skb->dev;
send_skb->protocol = htons(ETH_P_ARP);
/* Fill the device header for the ARP frame */
if (np->dev->hard_header &&
np->dev->hard_header(send_skb, skb->dev, ptype,
sha, np->local_mac,
send_skb->len) < 0) {
kfree_skb(send_skb);
return;
}
/*
* Fill out the arp protocol part.
*
* we only support ethernet device type,
* which (according to RFC 1390) should always equal 1 (Ethernet).
*/
arp->ar_hrd = htons(np->dev->type);
arp->ar_pro = htons(ETH_P_IP);
arp->ar_hln = np->dev->addr_len;
arp->ar_pln = 4;
arp->ar_op = htons(type);
arp_ptr=(unsigned char *)(arp + 1);
memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
arp_ptr += np->dev->addr_len;
memcpy(arp_ptr, &tip, 4);
arp_ptr += 4;
memcpy(arp_ptr, sha, np->dev->addr_len);
arp_ptr += np->dev->addr_len;
memcpy(arp_ptr, &sip, 4);
netpoll_send_skb(np, send_skb);
}
int __netpoll_rx(struct sk_buff *skb)
{
int proto, len, ulen;
struct iphdr *iph;
struct udphdr *uh;
struct netpoll_info *npi = skb->dev->npinfo;
struct netpoll *np = npi->rx_np;
if (!np)
goto out;
if (skb->dev->type != ARPHRD_ETHER)
goto out;
/* check if netpoll clients need ARP */
if (skb->protocol == __constant_htons(ETH_P_ARP) &&
atomic_read(&trapped)) {
skb_queue_tail(&npi->arp_tx, skb);
return 1;
}
proto = ntohs(eth_hdr(skb)->h_proto);
if (proto != ETH_P_IP)
goto out;
if (skb->pkt_type == PACKET_OTHERHOST)
goto out;
if (skb_shared(skb))
goto out;
iph = (struct iphdr *)skb->data;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
if (iph->ihl < 5 || iph->version != 4)
goto out;
if (!pskb_may_pull(skb, iph->ihl*4))
goto out;
if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
goto out;
len = ntohs(iph->tot_len);
if (skb->len < len || len < iph->ihl*4)
goto out;
/*
* Our transport medium may have padded the buffer out.
* Now We trim to the true length of the frame.
*/
if (pskb_trim_rcsum(skb, len))
goto out;
if (iph->protocol != IPPROTO_UDP)
goto out;
len -= iph->ihl*4;
uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
ulen = ntohs(uh->len);
if (ulen != len)
goto out;
if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
goto out;
if (np->local_ip && np->local_ip != ntohl(iph->daddr))
goto out;
if (np->remote_ip && np->remote_ip != ntohl(iph->saddr))
goto out;
if (np->local_port && np->local_port != ntohs(uh->dest))
goto out;
np->rx_hook(np, ntohs(uh->source),
(char *)(uh+1),
ulen - sizeof(struct udphdr));
kfree_skb(skb);
return 1;
out:
if (atomic_read(&trapped)) {
kfree_skb(skb);
return 1;
}
return 0;
}
int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
if (*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
*delim = 0;
np->local_port = simple_strtol(cur, NULL, 10);
cur = delim;
}
cur++;
printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
if (*cur != '/') {
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim = 0;
np->local_ip = ntohl(in_aton(cur));
cur = delim;
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->local_ip));
}
cur++;
if (*cur != ',') {
/* parse out dev name */
if ((delim = strchr(cur, ',')) == NULL)
goto parse_failed;
*delim = 0;
strlcpy(np->dev_name, cur, sizeof(np->dev_name));
cur = delim;
}
cur++;
printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
if (*cur != '@') {
/* dst port */
if ((delim = strchr(cur, '@')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_port = simple_strtol(cur, NULL, 10);
cur = delim;
}
cur++;
printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
/* dst ip */
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_ip = ntohl(in_aton(cur));
cur = delim + 1;
printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->remote_ip));
if (*cur != 0) {
/* MAC address */
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_mac[0] = simple_strtol(cur, NULL, 16);
cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_mac[1] = simple_strtol(cur, NULL, 16);
cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_mac[2] = simple_strtol(cur, NULL, 16);
cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_mac[3] = simple_strtol(cur, NULL, 16);
cur = delim + 1;
if ((delim = strchr(cur, ':')) == NULL)
goto parse_failed;
*delim = 0;
np->remote_mac[4] = simple_strtol(cur, NULL, 16);
cur = delim + 1;
np->remote_mac[5] = simple_strtol(cur, NULL, 16);
}
printk(KERN_INFO "%s: remote ethernet address "
"%02x:%02x:%02x:%02x:%02x:%02x\n",
np->name,
np->remote_mac[0],
np->remote_mac[1],
np->remote_mac[2],
np->remote_mac[3],
np->remote_mac[4],
np->remote_mac[5]);
return 0;
parse_failed:
printk(KERN_INFO "%s: couldn't parse config at %s!\n",
np->name, cur);
return -1;
}
int netpoll_setup(struct netpoll *np)
{
struct net_device *ndev = NULL;
struct in_device *in_dev;
struct netpoll_info *npinfo;
unsigned long flags;
int err;
if (np->dev_name)
ndev = dev_get_by_name(np->dev_name);
if (!ndev) {
printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
np->name, np->dev_name);
return -ENODEV;
}
np->dev = ndev;
if (!ndev->npinfo) {
npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
if (!npinfo) {
err = -ENOMEM;
goto release;
}
npinfo->rx_flags = 0;
npinfo->rx_np = NULL;
spin_lock_init(&npinfo->poll_lock);
npinfo->poll_owner = -1;
spin_lock_init(&npinfo->rx_lock);
skb_queue_head_init(&npinfo->arp_tx);
skb_queue_head_init(&npinfo->txq);
INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
atomic_set(&npinfo->refcnt, 1);
} else {
npinfo = ndev->npinfo;
atomic_inc(&npinfo->refcnt);
}
if (!ndev->poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
err = -ENOTSUPP;
goto release;
}
if (!netif_running(ndev)) {
unsigned long atmost, atleast;
printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
np->name, np->dev_name);
rtnl_lock();
err = dev_open(ndev);
rtnl_unlock();
if (err) {
printk(KERN_ERR "%s: failed to open %s\n",
np->name, ndev->name);
goto release;
}
atleast = jiffies + HZ/10;
atmost = jiffies + 4*HZ;
while (!netif_carrier_ok(ndev)) {
if (time_after(jiffies, atmost)) {
printk(KERN_NOTICE
"%s: timeout waiting for carrier\n",
np->name);
break;
}
cond_resched();
}
/* If carrier appears to come up instantly, we don't
* trust it and pause so that we don't pump all our
* queued console messages into the bitbucket.
*/
if (time_before(jiffies, atleast)) {
printk(KERN_NOTICE "%s: carrier detect appears"
" untrustworthy, waiting 4 seconds\n",
np->name);
msleep(4000);
}
}
if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
memcpy(np->local_mac, ndev->dev_addr, 6);
if (!np->local_ip) {
rcu_read_lock();
in_dev = __in_dev_get_rcu(ndev);
if (!in_dev || !in_dev->ifa_list) {
rcu_read_unlock();
printk(KERN_ERR "%s: no IP address for %s, aborting\n",
np->name, np->dev_name);
err = -EDESTADDRREQ;
goto release;
}
np->local_ip = ntohl(in_dev->ifa_list->ifa_local);
rcu_read_unlock();
printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
np->name, HIPQUAD(np->local_ip));
}
if (np->rx_hook) {
spin_lock_irqsave(&npinfo->rx_lock, flags);
npinfo->rx_flags |= NETPOLL_RX_ENABLED;
npinfo->rx_np = np;
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
/* fill up the skb queue */
refill_skbs();
/* last thing to do is link it to the net device structure */
ndev->npinfo = npinfo;
/* avoid racing with NAPI reading npinfo */
synchronize_rcu();
return 0;
release:
if (!ndev->npinfo)
kfree(npinfo);
np->dev = NULL;
dev_put(ndev);
return err;
}
static int __init netpoll_init(void)
{
skb_queue_head_init(&skb_pool);
return 0;
}
core_initcall(netpoll_init);
void netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
unsigned long flags;
if (np->dev) {
npinfo = np->dev->npinfo;
if (npinfo) {
if (npinfo->rx_np == np) {
spin_lock_irqsave(&npinfo->rx_lock, flags);
npinfo->rx_np = NULL;
npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
np->dev->npinfo = NULL;
if (atomic_dec_and_test(&npinfo->refcnt)) {
skb_queue_purge(&npinfo->arp_tx);
skb_queue_purge(&npinfo->txq);
cancel_rearming_delayed_work(&npinfo->tx_work);
flush_scheduled_work();
kfree(npinfo);
}
}
dev_put(np->dev);
}
np->dev = NULL;
}
int netpoll_trap(void)
{
return atomic_read(&trapped);
}
void netpoll_set_trap(int trap)
{
if (trap)
atomic_inc(&trapped);
else
atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
EXPORT_SYMBOL(netpoll_send_udp);
EXPORT_SYMBOL(netpoll_poll);

3642
net/core/pktgen.c Normal file

File diff suppressed because it is too large Load Diff

102
net/core/request_sock.c Normal file
View File

@@ -0,0 +1,102 @@
/*
* NET Generic infrastructure for Network protocols.
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* From code originally in include/net/tcp.h
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <net/request_sock.h>
/*
* Maximum number of SYN_RECV sockets in queue per LISTEN socket.
* One SYN_RECV socket costs about 80bytes on a 32bit machine.
* It would be better to replace it with a global counter for all sockets
* but then some measure against one socket starving all other sockets
* would be needed.
*
* It was 128 by default. Experiments with real servers show, that
* it is absolutely not enough even at 100conn/sec. 256 cures most
* of problems. This value is adjusted to 128 for very small machines
* (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
* Note : Dont forget somaxconn that may limit backlog too.
*/
int sysctl_max_syn_backlog = 256;
int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
{
size_t lopt_size = sizeof(struct listen_sock);
struct listen_sock *lopt;
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
nr_table_entries = max_t(u32, nr_table_entries, 8);
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
lopt = __vmalloc(lopt_size,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
return -ENOMEM;
for (lopt->max_qlen_log = 3;
(1 << lopt->max_qlen_log) < nr_table_entries;
lopt->max_qlen_log++);
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;
write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
write_unlock_bh(&queue->syn_wait_lock);
return 0;
}
EXPORT_SYMBOL(reqsk_queue_alloc);
void reqsk_queue_destroy(struct request_sock_queue *queue)
{
/* make all the listen_opt local to us */
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
size_t lopt_size = sizeof(struct listen_sock) +
lopt->nr_table_entries * sizeof(struct request_sock *);
if (lopt->qlen != 0) {
unsigned int i;
for (i = 0; i < lopt->nr_table_entries; i++) {
struct request_sock *req;
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
lopt->qlen--;
reqsk_free(req);
}
}
}
BUG_TRAP(lopt->qlen == 0);
if (lopt_size > PAGE_SIZE)
vfree(lopt);
else
kfree(lopt);
}
EXPORT_SYMBOL(reqsk_queue_destroy);

896
net/core/rtnetlink.c Normal file
View File

@@ -0,0 +1,896 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Routing netlink socket interface: protocol independent part.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Vitaly E. Lavrov RTA_OK arithmetics was wrong.
*/
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/if_addr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/string.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
#include <net/netlink.h>
#ifdef CONFIG_NET_WIRELESS_RTNETLINK
#include <linux/wireless.h>
#include <net/iw_handler.h>
#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
static DEFINE_MUTEX(rtnl_mutex);
static struct sock *rtnl;
void rtnl_lock(void)
{
mutex_lock(&rtnl_mutex);
}
void __rtnl_unlock(void)
{
mutex_unlock(&rtnl_mutex);
}
void rtnl_unlock(void)
{
mutex_unlock(&rtnl_mutex);
if (rtnl && rtnl->sk_receive_queue.qlen)
rtnl->sk_data_ready(rtnl, 0);
netdev_run_todo();
}
int rtnl_trylock(void)
{
return mutex_trylock(&rtnl_mutex);
}
int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
{
memset(tb, 0, sizeof(struct rtattr*)*maxattr);
while (RTA_OK(rta, len)) {
unsigned flavor = rta->rta_type;
if (flavor && flavor <= maxattr)
tb[flavor-1] = rta;
rta = RTA_NEXT(rta, len);
}
return 0;
}
struct rtnetlink_link * rtnetlink_links[NPROTO];
static const int rtm_min[RTM_NR_FAMILIES] =
{
[RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
[RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
[RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
[RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
[RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
[RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)),
[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
[RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
};
static const int rta_max[RTM_NR_FAMILIES] =
{
[RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
[RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
[RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
[RTM_FAM(RTM_NEWRULE)] = FRA_MAX,
[RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
[RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
[RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
[RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
};
void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
{
struct rtattr *rta;
int size = RTA_LENGTH(attrlen);
rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size));
rta->rta_type = attrtype;
rta->rta_len = size;
memcpy(RTA_DATA(rta), data, attrlen);
memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size);
}
size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
{
size_t ret = RTA_PAYLOAD(rta);
char *src = RTA_DATA(rta);
if (ret > 0 && src[ret - 1] == '\0')
ret--;
if (size > 0) {
size_t len = (ret >= size) ? size - 1 : ret;
memset(dest, 0, size);
memcpy(dest, src, len);
}
return ret;
}
int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
{
int err = 0;
NETLINK_CB(skb).dst_group = group;
if (echo)
atomic_inc(&skb->users);
netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
if (echo)
err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
return err;
}
int rtnl_unicast(struct sk_buff *skb, u32 pid)
{
return nlmsg_unicast(rtnl, skb, pid);
}
int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
struct nlmsghdr *nlh, gfp_t flags)
{
int report = 0;
if (nlh)
report = nlmsg_report(nlh);
return nlmsg_notify(rtnl, skb, pid, group, report, flags);
}
void rtnl_set_sk_err(u32 group, int error)
{
netlink_set_err(rtnl, 0, group, error);
}
int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
{
struct nlattr *mx;
int i, valid = 0;
mx = nla_nest_start(skb, RTA_METRICS);
if (mx == NULL)
return -ENOBUFS;
for (i = 0; i < RTAX_MAX; i++) {
if (metrics[i]) {
valid++;
NLA_PUT_U32(skb, i+1, metrics[i]);
}
}
if (!valid) {
nla_nest_cancel(skb, mx);
return 0;
}
return nla_nest_end(skb, mx);
nla_put_failure:
return nla_nest_cancel(skb, mx);
}
int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
u32 ts, u32 tsage, long expires, u32 error)
{
struct rta_cacheinfo ci = {
.rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
.rta_used = dst->__use,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
.rta_ts = ts,
.rta_tsage = tsage,
};
if (expires)
ci.rta_expires = jiffies_to_clock_t(expires);
return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
static void set_operstate(struct net_device *dev, unsigned char transition)
{
unsigned char operstate = dev->operstate;
switch(transition) {
case IF_OPER_UP:
if ((operstate == IF_OPER_DORMANT ||
operstate == IF_OPER_UNKNOWN) &&
!netif_dormant(dev))
operstate = IF_OPER_UP;
break;
case IF_OPER_DORMANT:
if (operstate == IF_OPER_UP ||
operstate == IF_OPER_UNKNOWN)
operstate = IF_OPER_DORMANT;
break;
};
if (dev->operstate != operstate) {
write_lock_bh(&dev_base_lock);
dev->operstate = operstate;
write_unlock_bh(&dev_base_lock);
netdev_state_change(dev);
}
}
static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
struct net_device_stats *b)
{
a->rx_packets = b->rx_packets;
a->tx_packets = b->tx_packets;
a->rx_bytes = b->rx_bytes;
a->tx_bytes = b->tx_bytes;
a->rx_errors = b->rx_errors;
a->tx_errors = b->tx_errors;
a->rx_dropped = b->rx_dropped;
a->tx_dropped = b->tx_dropped;
a->multicast = b->multicast;
a->collisions = b->collisions;
a->rx_length_errors = b->rx_length_errors;
a->rx_over_errors = b->rx_over_errors;
a->rx_crc_errors = b->rx_crc_errors;
a->rx_frame_errors = b->rx_frame_errors;
a->rx_fifo_errors = b->rx_fifo_errors;
a->rx_missed_errors = b->rx_missed_errors;
a->tx_aborted_errors = b->tx_aborted_errors;
a->tx_carrier_errors = b->tx_carrier_errors;
a->tx_fifo_errors = b->tx_fifo_errors;
a->tx_heartbeat_errors = b->tx_heartbeat_errors;
a->tx_window_errors = b->tx_window_errors;
a->rx_compressed = b->rx_compressed;
a->tx_compressed = b->tx_compressed;
};
static inline size_t if_nlmsg_size(int iwbuflen)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+ nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+ nla_total_size(sizeof(struct rtnl_link_ifmap))
+ nla_total_size(sizeof(struct rtnl_link_stats))
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+ nla_total_size(4) /* IFLA_TXQLEN */
+ nla_total_size(4) /* IFLA_WEIGHT */
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
+ nla_total_size(4) /* IFLA_MASTER */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(iwbuflen);
}
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
void *iwbuf, int iwbuflen, int type, u32 pid,
u32 seq, u32 change, unsigned int flags)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
return -EMSGSIZE;
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type;
ifm->ifi_index = dev->ifindex;
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
NLA_PUT_U8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
if (dev->ifindex != dev->iflink)
NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
if (dev->master)
NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
if (dev->qdisc_sleeping)
NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
if (1) {
struct rtnl_link_ifmap map = {
.mem_start = dev->mem_start,
.mem_end = dev->mem_end,
.base_addr = dev->base_addr,
.irq = dev->irq,
.dma = dev->dma,
.port = dev->if_port,
};
NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
}
if (dev->addr_len) {
NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
if (dev->get_stats) {
struct net_device_stats *stats = dev->get_stats(dev);
if (stats) {
struct nlattr *attr;
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (attr == NULL)
goto nla_put_failure;
copy_rtnl_link_stats(nla_data(attr), stats);
}
}
if (iwbuf)
NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
read_lock(&dev_base_lock);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
break;
}
read_unlock(&dev_base_lock);
cb->args[0] = idx;
return skb->len;
}
static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
[IFLA_MTU] = { .type = NLA_U32 },
[IFLA_TXQLEN] = { .type = NLA_U32 },
[IFLA_WEIGHT] = { .type = NLA_U32 },
[IFLA_OPERSTATE] = { .type = NLA_U8 },
[IFLA_LINKMODE] = { .type = NLA_U8 },
};
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct ifinfomsg *ifm;
struct net_device *dev;
int err, send_addr_notify = 0, modified = 0;
struct nlattr *tb[IFLA_MAX+1];
char ifname[IFNAMSIZ];
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
goto errout;
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
ifname[0] = '\0';
err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index >= 0)
dev = dev_get_by_index(ifm->ifi_index);
else if (tb[IFLA_IFNAME])
dev = dev_get_by_name(ifname);
else
goto errout;
if (dev == NULL) {
err = -ENODEV;
goto errout;
}
if (tb[IFLA_ADDRESS] &&
nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
goto errout_dev;
if (tb[IFLA_BROADCAST] &&
nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
goto errout_dev;
if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
if (!dev->set_config) {
err = -EOPNOTSUPP;
goto errout_dev;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto errout_dev;
}
u_map = nla_data(tb[IFLA_MAP]);
k_map.mem_start = (unsigned long) u_map->mem_start;
k_map.mem_end = (unsigned long) u_map->mem_end;
k_map.base_addr = (unsigned short) u_map->base_addr;
k_map.irq = (unsigned char) u_map->irq;
k_map.dma = (unsigned char) u_map->dma;
k_map.port = (unsigned char) u_map->port;
err = dev->set_config(dev, &k_map);
if (err < 0)
goto errout_dev;
modified = 1;
}
if (tb[IFLA_ADDRESS]) {
struct sockaddr *sa;
int len;
if (!dev->set_mac_address) {
err = -EOPNOTSUPP;
goto errout_dev;
}
if (!netif_device_present(dev)) {
err = -ENODEV;
goto errout_dev;
}
len = sizeof(sa_family_t) + dev->addr_len;
sa = kmalloc(len, GFP_KERNEL);
if (!sa) {
err = -ENOMEM;
goto errout_dev;
}
sa->sa_family = dev->type;
memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
dev->addr_len);
err = dev->set_mac_address(dev, sa);
kfree(sa);
if (err)
goto errout_dev;
send_addr_notify = 1;
modified = 1;
}
if (tb[IFLA_MTU]) {
err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
if (err < 0)
goto errout_dev;
modified = 1;
}
/*
* Interface selected by interface index but interface
* name provided implies that a name change has been
* requested.
*/
if (ifm->ifi_index >= 0 && ifname[0]) {
err = dev_change_name(dev, ifname);
if (err < 0)
goto errout_dev;
modified = 1;
}
#ifdef CONFIG_NET_WIRELESS_RTNETLINK
if (tb[IFLA_WIRELESS]) {
/* Call Wireless Extensions.
* Various stuff checked in there... */
err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
nla_len(tb[IFLA_WIRELESS]));
if (err < 0)
goto errout_dev;
}
#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
if (tb[IFLA_BROADCAST]) {
nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
send_addr_notify = 1;
}
if (ifm->ifi_flags)
dev_change_flags(dev, ifm->ifi_flags);
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
if (tb[IFLA_WEIGHT])
dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE]) {
write_lock_bh(&dev_base_lock);
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
write_unlock_bh(&dev_base_lock);
}
err = 0;
errout_dev:
if (err < 0 && modified && net_ratelimit())
printk(KERN_WARNING "A link change request failed with "
"some changes comitted already. Interface %s may "
"have been left with an inconsistent configuration, "
"please check.\n", dev->name);
if (send_addr_notify)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
dev_put(dev);
errout:
return err;
}
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
struct ifinfomsg *ifm;
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
char *iw_buf = NULL, *iw = NULL;
int iw_buf_len = 0;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
return err;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index >= 0) {
dev = dev_get_by_index(ifm->ifi_index);
if (dev == NULL)
return -ENODEV;
} else
return -EINVAL;
#ifdef CONFIG_NET_WIRELESS_RTNETLINK
if (tb[IFLA_WIRELESS]) {
/* Call Wireless Extensions. We need to know the size before
* we can alloc. Various stuff checked in there... */
err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
nla_len(tb[IFLA_WIRELESS]),
&iw_buf, &iw_buf_len);
if (err < 0)
goto errout;
/* Payload is at an offset in buffer */
iw = iw_buf + IW_EV_POINT_OFF;
}
#endif /* CONFIG_NET_WIRELESS_RTNETLINK */
nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
if (nskb == NULL) {
err = -ENOBUFS;
goto errout;
}
err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
goto errout;
}
err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
errout:
kfree(iw_buf);
dev_put(dev);
return err;
}
static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
if (s_idx == 0)
s_idx = 1;
for (idx=1; idx<NPROTO; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
if (rtnetlink_links[idx] == NULL ||
rtnetlink_links[idx][type].dumpit == NULL)
continue;
if (idx > s_idx)
memset(&cb->args[0], 0, sizeof(cb->args));
if (rtnetlink_links[idx][type].dumpit(skb, cb))
break;
}
cb->family = idx;
return skb->len;
}
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
struct sk_buff *skb;
int err = -ENOBUFS;
skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL);
if (skb == NULL)
goto errout;
err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
errout:
if (err < 0)
rtnl_set_sk_err(RTNLGRP_LINK, err);
}
/* Protected by RTNL sempahore. */
static struct rtattr **rta_buf;
static int rtattr_max;
/* Process one rtnetlink message. */
static __inline__ int
rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
{
struct rtnetlink_link *link;
struct rtnetlink_link *link_tab;
int sz_idx, kind;
int min_len;
int family;
int type;
int err;
/* Only requests are handled by kernel now */
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
type = nlh->nlmsg_type;
/* A control message: ignore them */
if (type < RTM_BASE)
return 0;
/* Unknown message: reply with EINVAL */
if (type > RTM_MAX)
goto err_inval;
type -= RTM_BASE;
/* All the messages must have at least 1 byte length */
if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg)))
return 0;
family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
if (family >= NPROTO) {
*errp = -EAFNOSUPPORT;
return -1;
}
link_tab = rtnetlink_links[family];
if (link_tab == NULL)
link_tab = rtnetlink_links[PF_UNSPEC];
link = &link_tab[type];
sz_idx = type>>2;
kind = type&3;
if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
*errp = -EPERM;
return -1;
}
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
if (link->dumpit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->dumpit == NULL)
goto err_inval;
if ((*errp = netlink_dump_start(rtnl, skb, nlh,
link->dumpit, NULL)) != 0) {
return -1;
}
netlink_queue_skip(nlh, skb);
return -1;
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
min_len = rtm_min[sz_idx];
if (nlh->nlmsg_len < min_len)
goto err_inval;
if (nlh->nlmsg_len > min_len) {
int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len);
while (RTA_OK(attr, attrlen)) {
unsigned flavor = attr->rta_type;
if (flavor) {
if (flavor > rta_max[sz_idx])
goto err_inval;
rta_buf[flavor-1] = attr;
}
attr = RTA_NEXT(attr, attrlen);
}
}
if (link->doit == NULL)
link = &(rtnetlink_links[PF_UNSPEC][type]);
if (link->doit == NULL)
goto err_inval;
err = link->doit(skb, nlh, (void *)&rta_buf[0]);
*errp = err;
return err;
err_inval:
*errp = -EINVAL;
return -1;
}
static void rtnetlink_rcv(struct sock *sk, int len)
{
unsigned int qlen = 0;
do {
mutex_lock(&rtnl_mutex);
netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
mutex_unlock(&rtnl_mutex);
netdev_run_todo();
} while (qlen);
}
static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
{
[RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink,
.dumpit = rtnl_dump_ifinfo },
[RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink },
[RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all },
[RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all },
[RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
[RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
[RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
#ifdef CONFIG_FIB_RULES
[RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule },
[RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule },
#endif
[RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all },
[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
[RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
};
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
break;
case NETDEV_REGISTER:
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
break;
case NETDEV_UP:
case NETDEV_DOWN:
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
break;
case NETDEV_CHANGE:
case NETDEV_GOING_DOWN:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
break;
}
return NOTIFY_DONE;
}
static struct notifier_block rtnetlink_dev_notifier = {
.notifier_call = rtnetlink_event,
};
void __init rtnetlink_init(void)
{
int i;
rtattr_max = 0;
for (i = 0; i < ARRAY_SIZE(rta_max); i++)
if (rta_max[i] > rtattr_max)
rtattr_max = rta_max[i];
rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
}
EXPORT_SYMBOL(__rta_fill);
EXPORT_SYMBOL(rtattr_strlcpy);
EXPORT_SYMBOL(rtattr_parse);
EXPORT_SYMBOL(rtnetlink_links);
EXPORT_SYMBOL(rtnetlink_put_metrics);
EXPORT_SYMBOL(rtnl_lock);
EXPORT_SYMBOL(rtnl_trylock);
EXPORT_SYMBOL(rtnl_unlock);
EXPORT_SYMBOL(rtnl_unicast);
EXPORT_SYMBOL(rtnl_notify);
EXPORT_SYMBOL(rtnl_set_sk_err);

290
net/core/scm.c Normal file
View File

@@ -0,0 +1,290 @@
/* scm.c - Socket level control messages processing.
*
* Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Alignment and value checking mods by Craig Metz
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/signal.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/security.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/compat.h>
#include <net/scm.h>
/*
* Only allow a user to send credentials, that they could set with
* setu(g)id.
*/
static __inline__ int scm_check_creds(struct ucred *creds)
{
if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
((creds->uid == current->uid || creds->uid == current->euid ||
creds->uid == current->suid) || capable(CAP_SETUID)) &&
((creds->gid == current->gid || creds->gid == current->egid ||
creds->gid == current->sgid) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
}
static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
{
int *fdp = (int*)CMSG_DATA(cmsg);
struct scm_fp_list *fpl = *fplp;
struct file **fpp;
int i, num;
num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int);
if (num <= 0)
return 0;
if (num > SCM_MAX_FD)
return -EINVAL;
if (!fpl)
{
fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
}
fpp = &fpl->fp[fpl->count];
if (fpl->count + num > SCM_MAX_FD)
return -EINVAL;
/*
* Verify the descriptors and increment the usage count.
*/
for (i=0; i< num; i++)
{
int fd = fdp[i];
struct file *file;
if (fd < 0 || !(file = fget(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
}
return num;
}
void __scm_destroy(struct scm_cookie *scm)
{
struct scm_fp_list *fpl = scm->fp;
int i;
if (fpl) {
scm->fp = NULL;
for (i=fpl->count-1; i>=0; i--)
fput(fpl->fp[i]);
kfree(fpl);
}
}
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
struct cmsghdr *cmsg;
int err;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
{
err = -EINVAL;
/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
/* The first check was omitted in <= 2.2.5. The reasoning was
that parser checks cmsg_len in any case, so that
additional check would be work duplication.
But if cmsg_level is not SOL_SOCKET, we do not check
for too short ancillary data object at all! Oops.
OK, let's add it...
*/
if (!CMSG_OK(msg, cmsg))
goto error;
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
goto error;
break;
case SCM_CREDENTIALS:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
goto error;
memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
err = scm_check_creds(&p->creds);
if (err)
goto error;
break;
default:
goto error;
}
}
if (p->fp && !p->fp->count)
{
kfree(p->fp);
p->fp = NULL;
}
return 0;
error:
scm_destroy(p);
return err;
}
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
struct cmsghdr cmhdr;
int cmlen = CMSG_LEN(len);
int err;
if (MSG_CMSG_COMPAT & msg->msg_flags)
return put_cmsg_compat(msg, level, type, len, data);
if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
msg->msg_flags |= MSG_CTRUNC;
return 0; /* XXX: return error? check spec. */
}
if (msg->msg_controllen < cmlen) {
msg->msg_flags |= MSG_CTRUNC;
cmlen = msg->msg_controllen;
}
cmhdr.cmsg_level = level;
cmhdr.cmsg_type = type;
cmhdr.cmsg_len = cmlen;
err = -EFAULT;
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
goto out;
if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
goto out;
cmlen = CMSG_SPACE(len);
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
err = 0;
out:
return err;
}
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
int fdmax = 0;
int fdnum = scm->fp->count;
struct file **fp = scm->fp->fp;
int __user *cmfptr;
int err = 0, i;
if (MSG_CMSG_COMPAT & msg->msg_flags) {
scm_detach_fds_compat(msg, scm);
return;
}
if (msg->msg_controllen > sizeof(struct cmsghdr))
fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
/ sizeof(int));
if (fdnum < fdmax)
fdmax = fdnum;
for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
{
int new_fd;
err = security_file_receive(fp[i]);
if (err)
break;
err = get_unused_fd();
if (err < 0)
break;
new_fd = err;
err = put_user(new_fd, cmfptr);
if (err) {
put_unused_fd(new_fd);
break;
}
/* Bump the usage count and install the file. */
get_file(fp[i]);
fd_install(new_fd, fp[i]);
}
if (i > 0)
{
int cmlen = CMSG_LEN(i*sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type);
if (!err)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
}
if (i < fdnum || (fdnum && fdmax <= 0))
msg->msg_flags |= MSG_CTRUNC;
/*
* All of the files that fit in the message have had their
* usage counts incremented, so we just free the list.
*/
__scm_destroy(scm);
}
struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
{
struct scm_fp_list *new_fpl;
int i;
if (!fpl)
return NULL;
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
if (new_fpl) {
for (i=fpl->count-1; i>=0; i--)
get_file(fpl->fp[i]);
memcpy(new_fpl, fpl, sizeof(*fpl));
}
return new_fpl;
}
EXPORT_SYMBOL(__scm_destroy);
EXPORT_SYMBOL(__scm_send);
EXPORT_SYMBOL(put_cmsg);
EXPORT_SYMBOL(scm_detach_fds);
EXPORT_SYMBOL(scm_fp_dup);

2041
net/core/skbuff.c Normal file

File diff suppressed because it is too large Load Diff

1976
net/core/sock.c Normal file

File diff suppressed because it is too large Load Diff

290
net/core/stream.c Normal file
View File

@@ -0,0 +1,290 @@
/*
* SUCS NET3:
*
* Generic stream handling routines. These are generic for most
* protocols. Even IP. Tonight 8-).
* This is used because TCP, LLC (others too) layer all have mostly
* identical sendmsg() and recvmsg() code.
* So we (will) share it here.
*
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* (from old tcp.c code)
* Alan Cox <alan@redhat.com> (Borrowed comments 8-))
*/
#include <linux/module.h>
#include <linux/net.h>
#include <linux/signal.h>
#include <linux/tcp.h>
#include <linux/wait.h>
#include <net/sock.h>
/**
* sk_stream_write_space - stream socket write_space callback.
* @sk: socket
*
* FIXME: write proper description
*/
void sk_stream_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, 2, POLL_OUT);
}
}
EXPORT_SYMBOL(sk_stream_write_space);
/**
* sk_stream_wait_connect - Wait for a socket to get into the connected state
* @sk: sock to wait on
* @timeo_p: for how long to wait
*
* Must be called with the socket locked.
*/
int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
{
struct task_struct *tsk = current;
DEFINE_WAIT(wait);
int done;
do {
int err = sock_error(sk);
if (err)
return err;
if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
return -EPIPE;
if (!*timeo_p)
return -EAGAIN;
if (signal_pending(tsk))
return sock_intr_errno(*timeo_p);
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
sk->sk_write_pending++;
done = sk_wait_event(sk, timeo_p,
!sk->sk_err &&
!((1 << sk->sk_state) &
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
finish_wait(sk->sk_sleep, &wait);
sk->sk_write_pending--;
} while (!done);
return 0;
}
EXPORT_SYMBOL(sk_stream_wait_connect);
/**
* sk_stream_closing - Return 1 if we still have things to send in our buffers.
* @sk: socket to verify
*/
static inline int sk_stream_closing(struct sock *sk)
{
return (1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
}
void sk_stream_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
DEFINE_WAIT(wait);
do {
prepare_to_wait(sk->sk_sleep, &wait,
TASK_INTERRUPTIBLE);
if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
break;
} while (!signal_pending(current) && timeout);
finish_wait(sk->sk_sleep, &wait);
}
}
EXPORT_SYMBOL(sk_stream_wait_close);
/**
* sk_stream_wait_memory - Wait for more memory for a socket
* @sk: socket to wait for memory
* @timeo_p: for how long
*/
int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
{
int err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT(wait);
if (sk_stream_memory_free(sk))
current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
while (1) {
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
if (!*timeo_p)
goto do_nonblock;
if (signal_pending(current))
goto do_interrupted;
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
if (sk_stream_memory_free(sk) && !vm_wait)
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
sk_wait_event(sk, &current_timeo, !sk->sk_err &&
!(sk->sk_shutdown & SEND_SHUTDOWN) &&
sk_stream_memory_free(sk) &&
vm_wait);
sk->sk_write_pending--;
if (vm_wait) {
vm_wait -= current_timeo;
current_timeo = *timeo_p;
if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
(current_timeo -= vm_wait) < 0)
current_timeo = 0;
vm_wait = 0;
}
*timeo_p = current_timeo;
}
out:
finish_wait(sk->sk_sleep, &wait);
return err;
do_error:
err = -EPIPE;
goto out;
do_nonblock:
err = -EAGAIN;
goto out;
do_interrupted:
err = sock_intr_errno(*timeo_p);
goto out;
}
EXPORT_SYMBOL(sk_stream_wait_memory);
void sk_stream_rfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
skb_truesize_check(skb);
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
sk->sk_forward_alloc += skb->truesize;
}
EXPORT_SYMBOL(sk_stream_rfree);
int sk_stream_error(struct sock *sk, int flags, int err)
{
if (err == -EPIPE)
err = sock_error(sk) ? : -EPIPE;
if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
return err;
}
EXPORT_SYMBOL(sk_stream_error);
void __sk_stream_mem_reclaim(struct sock *sk)
{
atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
sk->sk_prot->memory_allocated);
sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
if (*sk->sk_prot->memory_pressure &&
(atomic_read(sk->sk_prot->memory_allocated) <
sk->sk_prot->sysctl_mem[0]))
*sk->sk_prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_stream_mem_reclaim);
int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
{
int amt = sk_stream_pages(size);
sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
atomic_add(amt, sk->sk_prot->memory_allocated);
/* Under limit. */
if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
if (*sk->sk_prot->memory_pressure)
*sk->sk_prot->memory_pressure = 0;
return 1;
}
/* Over hard limit. */
if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
sk->sk_prot->enter_memory_pressure();
goto suppress_allocation;
}
/* Under pressure. */
if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
sk->sk_prot->enter_memory_pressure();
if (kind) {
if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
return 1;
} else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
return 1;
if (!*sk->sk_prot->memory_pressure ||
sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
sk_stream_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
return 1;
suppress_allocation:
if (!kind) {
sk_stream_moderate_sndbuf(sk);
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
return 1;
}
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
atomic_sub(amt, sk->sk_prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(sk_stream_mem_schedule);
void sk_stream_kill_queues(struct sock *sk)
{
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
/* Next, the error queue. */
__skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
sk_stream_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
* have gone away, only the net layer knows can touch it.
*/
}
EXPORT_SYMBOL(sk_stream_kill_queues);

142
net/core/sysctl_net_core.c Normal file
View File

@@ -0,0 +1,142 @@
/* -*- linux-c -*-
* sysctl_net_core.c: sysctl interface to net core subsystem.
*
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/core directory entry (empty =) ). [MS]
*/
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <net/sock.h>
#ifdef CONFIG_SYSCTL
extern int netdev_max_backlog;
extern int weight_p;
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_core_destroy_delay;
#ifdef CONFIG_XFRM
extern u32 sysctl_xfrm_aevent_etime;
extern u32 sysctl_xfrm_aevent_rseqth;
#endif
ctl_table core_table[] = {
#ifdef CONFIG_NET
{
.ctl_name = NET_CORE_WMEM_MAX,
.procname = "wmem_max",
.data = &sysctl_wmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_RMEM_MAX,
.procname = "rmem_max",
.data = &sysctl_rmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_WMEM_DEFAULT,
.procname = "wmem_default",
.data = &sysctl_wmem_default,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_RMEM_DEFAULT,
.procname = "rmem_default",
.data = &sysctl_rmem_default,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_DEV_WEIGHT,
.procname = "dev_weight",
.data = &weight_p,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_MAX_BACKLOG,
.procname = "netdev_max_backlog",
.data = &netdev_max_backlog,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_MSG_COST,
.procname = "message_cost",
.data = &net_msg_cost,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies,
},
{
.ctl_name = NET_CORE_MSG_BURST,
.procname = "message_burst",
.data = &net_msg_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = NET_CORE_OPTMEM_MAX,
.procname = "optmem_max",
.data = &sysctl_optmem_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
#ifdef CONFIG_XFRM
{
.ctl_name = NET_CORE_AEVENT_ETIME,
.procname = "xfrm_aevent_etime",
.data = &sysctl_xfrm_aevent_etime,
.maxlen = sizeof(u32),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_AEVENT_RSEQTH,
.procname = "xfrm_aevent_rseqth",
.data = &sysctl_xfrm_aevent_rseqth,
.maxlen = sizeof(u32),
.mode = 0644,
.proc_handler = &proc_dointvec
},
#endif /* CONFIG_XFRM */
#endif /* CONFIG_NET */
{
.ctl_name = NET_CORE_SOMAXCONN,
.procname = "somaxconn",
.data = &sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_CORE_BUDGET,
.procname = "netdev_budget",
.data = &netdev_budget,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
},
{ .ctl_name = 0 }
};
#endif

132
net/core/user_dma.c Normal file
View File

@@ -0,0 +1,132 @@
/*
* Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
* Portions based on net/core/datagram.c and copyrighted by their authors.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called COPYING.
*/
/*
* This code allows the net stack to make use of a DMA engine for
* skb to iovec copies.
*/
#include <linux/dmaengine.h>
#include <linux/socket.h>
#include <linux/rtnetlink.h> /* for BUG_TRAP */
#include <net/tcp.h>
#include <net/netdma.h>
#define NET_DMA_DEFAULT_COPYBREAK 4096
int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
/**
* dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
* @skb - buffer to copy
* @offset - offset in the buffer to start copying from
* @iovec - io vector to copy to
* @len - amount of data to copy from buffer to iovec
* @pinned_list - locked iovec buffer data
*
* Note: the iovec is modified during the copy.
*/
int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
struct sk_buff *skb, int offset, struct iovec *to,
size_t len, struct dma_pinned_list *pinned_list)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
dma_cookie_t cookie = 0;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
skb->data + offset, copy);
if (cookie < 0)
goto fault;
len -= copy;
if (len == 0)
goto end;
offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
BUG_TRAP(start <= offset + len);
end = start + skb_shinfo(skb)->frags[i].size;
copy = end - offset;
if ((copy = end - offset) > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct page *page = frag->page;
if (copy > len)
copy = len;
cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
frag->page_offset + offset - start, copy);
if (cookie < 0)
goto fault;
len -= copy;
if (len == 0)
goto end;
offset += copy;
}
start = end;
}
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list = list->next) {
int end;
BUG_TRAP(start <= offset + len);
end = start + list->len;
copy = end - offset;
if (copy > 0) {
if (copy > len)
copy = len;
cookie = dma_skb_copy_datagram_iovec(chan, list,
offset - start, to, copy,
pinned_list);
if (cookie < 0)
goto fault;
len -= copy;
if (len == 0)
goto end;
offset += copy;
}
start = end;
}
}
end:
if (!len) {
skb->dma_cookie = cookie;
return cookie;
}
fault:
return -EFAULT;
}

292
net/core/utils.c Normal file
View File

@@ -0,0 +1,292 @@
/*
* Generic address resultion entity
*
* Authors:
* net_random Alan Cox
* net_ratelimit Andi Kleen
* in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
*
* Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/inet.h>
#include <linux/mm.h>
#include <linux/net.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/uaccess.h>
int net_msg_cost = 5*HZ;
int net_msg_burst = 10;
/*
* All net warning printk()s should be guarded by this function.
*/
int net_ratelimit(void)
{
return __printk_ratelimit(net_msg_cost, net_msg_burst);
}
EXPORT_SYMBOL(net_ratelimit);
/*
* Convert an ASCII string to binary IP.
* This is outside of net/ipv4/ because various code that uses IP addresses
* is otherwise not dependent on the TCP/IP stack.
*/
__be32 in_aton(const char *str)
{
unsigned long l;
unsigned int val;
int i;
l = 0;
for (i = 0; i < 4; i++)
{
l <<= 8;
if (*str != '\0')
{
val = 0;
while (*str != '\0' && *str != '.' && *str != '\n')
{
val *= 10;
val += *str - '0';
str++;
}
l |= val;
if (*str != '\0')
str++;
}
}
return(htonl(l));
}
EXPORT_SYMBOL(in_aton);
#define IN6PTON_XDIGIT 0x00010000
#define IN6PTON_DIGIT 0x00020000
#define IN6PTON_COLON_MASK 0x00700000
#define IN6PTON_COLON_1 0x00100000 /* single : requested */
#define IN6PTON_COLON_2 0x00200000 /* second : requested */
#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */
#define IN6PTON_DOT 0x00800000 /* . */
#define IN6PTON_DELIM 0x10000000
#define IN6PTON_NULL 0x20000000 /* first/tail */
#define IN6PTON_UNKNOWN 0x40000000
static inline int digit2bin(char c, int delim)
{
if (c == delim || c == '\0')
return IN6PTON_DELIM;
if (c == '.')
return IN6PTON_DOT;
if (c >= '0' && c <= '9')
return (IN6PTON_DIGIT | (c - '0'));
return IN6PTON_UNKNOWN;
}
static inline int xdigit2bin(char c, int delim)
{
if (c == delim || c == '\0')
return IN6PTON_DELIM;
if (c == ':')
return IN6PTON_COLON_MASK;
if (c == '.')
return IN6PTON_DOT;
if (c >= '0' && c <= '9')
return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
if (c >= 'a' && c <= 'f')
return (IN6PTON_XDIGIT | (c - 'a' + 10));
if (c >= 'A' && c <= 'F')
return (IN6PTON_XDIGIT | (c - 'A' + 10));
if (delim == -1)
return IN6PTON_DELIM;
return IN6PTON_UNKNOWN;
}
int in4_pton(const char *src, int srclen,
u8 *dst,
int delim, const char **end)
{
const char *s;
u8 *d;
u8 dbuf[4];
int ret = 0;
int i;
int w = 0;
if (srclen < 0)
srclen = strlen(src);
s = src;
d = dbuf;
i = 0;
while(1) {
int c;
c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) {
goto out;
}
if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
if (w == 0)
goto out;
*d++ = w & 0xff;
w = 0;
i++;
if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
if (i != 4)
goto out;
break;
}
goto cont;
}
w = (w * 10) + c;
if ((w & 0xffff) > 255) {
goto out;
}
cont:
if (i >= 4)
goto out;
s++;
srclen--;
}
ret = 1;
memcpy(dst, dbuf, sizeof(dbuf));
out:
if (end)
*end = s;
return ret;
}
EXPORT_SYMBOL(in4_pton);
int in6_pton(const char *src, int srclen,
u8 *dst,
int delim, const char **end)
{
const char *s, *tok = NULL;
u8 *d, *dc = NULL;
u8 dbuf[16];
int ret = 0;
int i;
int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
int w = 0;
memset(dbuf, 0, sizeof(dbuf));
s = src;
d = dbuf;
if (srclen < 0)
srclen = strlen(src);
while (1) {
int c;
c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
if (!(c & state))
goto out;
if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
/* process one 16-bit word */
if (!(state & IN6PTON_NULL)) {
*d++ = (w >> 8) & 0xff;
*d++ = w & 0xff;
}
w = 0;
if (c & IN6PTON_DELIM) {
/* We've processed last word */
break;
}
/*
* COLON_1 => XDIGIT
* COLON_2 => XDIGIT|DELIM
* COLON_1_2 => COLON_2
*/
switch (state & IN6PTON_COLON_MASK) {
case IN6PTON_COLON_2:
dc = d;
state = IN6PTON_XDIGIT | IN6PTON_DELIM;
if (dc - dbuf >= sizeof(dbuf))
state |= IN6PTON_NULL;
break;
case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
break;
case IN6PTON_COLON_1:
state = IN6PTON_XDIGIT;
break;
case IN6PTON_COLON_1_2:
state = IN6PTON_COLON_2;
break;
default:
state = 0;
}
tok = s + 1;
goto cont;
}
if (c & IN6PTON_DOT) {
ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
if (ret > 0) {
d += 4;
break;
}
goto out;
}
w = (w << 4) | (0xff & c);
state = IN6PTON_COLON_1 | IN6PTON_DELIM;
if (!(w & 0xf000)) {
state |= IN6PTON_XDIGIT;
}
if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
state |= IN6PTON_COLON_1_2;
state &= ~IN6PTON_DELIM;
}
if (d + 2 >= dbuf + sizeof(dbuf)) {
state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
}
cont:
if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
d + 4 == dbuf + sizeof(dbuf)) {
state |= IN6PTON_DOT;
}
if (d >= dbuf + sizeof(dbuf)) {
state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
}
s++;
srclen--;
}
i = 15; d--;
if (dc) {
while(d >= dc)
dst[i--] = *d--;
while(i >= dc - dbuf)
dst[i--] = 0;
while(i >= 0)
dst[i--] = *d--;
} else
memcpy(dst, dbuf, sizeof(dbuf));
ret = 1;
out:
if (end)
*end = s;
return ret;
}
EXPORT_SYMBOL(in6_pton);

2371
net/core/wireless.c Normal file

File diff suppressed because it is too large Load Diff