Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

64
net/dccp/Kconfig Normal file
View File

@@ -0,0 +1,64 @@
menu "DCCP Configuration (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
config IP_DCCP
tristate "The DCCP Protocol (EXPERIMENTAL)"
---help---
Datagram Congestion Control Protocol (RFC 4340)
From http://www.ietf.org/rfc/rfc4340.txt:
The Datagram Congestion Control Protocol (DCCP) is a transport
protocol that implements bidirectional, unicast connections of
congestion-controlled, unreliable datagrams. It should be suitable
for use by applications such as streaming media, Internet telephony,
and on-line games.
To compile this protocol support as a module, choose M here: the
module will be called dccp.
If in doubt, say N.
config INET_DCCP_DIAG
depends on IP_DCCP && INET_DIAG
def_tristate y if (IP_DCCP = y && INET_DIAG = y)
def_tristate m
config IP_DCCP_ACKVEC
depends on IP_DCCP
bool
source "net/dccp/ccids/Kconfig"
menu "DCCP Kernel Hacking"
depends on IP_DCCP && DEBUG_KERNEL=y
config IP_DCCP_DEBUG
bool "DCCP debug messages"
---help---
Only use this if you're hacking DCCP.
When compiling DCCP as a module, this debugging output can be toggled
by setting the parameter dccp_debug of the `dccp' module to 0 or 1.
Just say N.
config NET_DCCPPROBE
tristate "DCCP connection probing"
depends on PROC_FS && KPROBES
---help---
This module allows for capturing the changes to DCCP connection
state in response to incoming packets. It is used for debugging
DCCP congestion avoidance modules. If you don't understand
what was just said, you don't need it: say N.
Documentation on how to use DCCP connection probing can be found
at http://linux-net.osdl.org/index.php/DccpProbe
To compile this code as a module, choose M here: the
module will be called dccp_probe.
endmenu
endmenu

21
net/dccp/Makefile Normal file
View File

@@ -0,0 +1,21 @@
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
dccp_ipv4-y := ipv4.o
# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
dccp_ipv6-y := ipv6.o
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
dccp-$(CONFIG_SYSCTL) += sysctl.o
dccp_diag-y := diag.o
dccp_probe-y := probe.o
obj-y += ccids/

515
net/dccp/ackvec.c Normal file
View File

@@ -0,0 +1,515 @@
/*
* net/dccp/ackvec.c
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; version 2 of the License;
*/
#include "ackvec.h"
#include "dccp.h"
#include <linux/dccp.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/sock.h>
static struct kmem_cache *dccp_ackvec_slab;
static struct kmem_cache *dccp_ackvec_record_slab;
static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
{
struct dccp_ackvec_record *avr =
kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
if (avr != NULL)
INIT_LIST_HEAD(&avr->dccpavr_node);
return avr;
}
static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
{
if (unlikely(avr == NULL))
return;
/* Check if deleting a linked record */
WARN_ON(!list_empty(&avr->dccpavr_node));
kmem_cache_free(dccp_ackvec_record_slab, avr);
}
static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
struct dccp_ackvec_record *avr)
{
/*
* AVRs are sorted by seqno. Since we are sending them in order, we
* just add the AVR at the head of the list.
* -sorbo.
*/
if (!list_empty(&av->dccpav_records)) {
const struct dccp_ackvec_record *head =
list_entry(av->dccpav_records.next,
struct dccp_ackvec_record,
dccpavr_node);
BUG_ON(before48(avr->dccpavr_ack_seqno,
head->dccpavr_ack_seqno));
}
list_add(&avr->dccpavr_node, &av->dccpav_records);
}
int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
/* Figure out how many options do we need to represent the ackvec */
const u16 nr_opts = (av->dccpav_vec_len +
DCCP_MAX_ACKVEC_OPT_LEN - 1) /
DCCP_MAX_ACKVEC_OPT_LEN;
u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
struct timeval now;
u32 elapsed_time;
const unsigned char *tail, *from;
unsigned char *to;
struct dccp_ackvec_record *avr;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return -1;
dccp_timestamp(sk, &now);
elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
if (elapsed_time != 0 &&
dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
return -1;
avr = dccp_ackvec_record_new();
if (avr == NULL)
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
len = av->dccpav_vec_len;
from = av->dccpav_buf + av->dccpav_buf_head;
tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN;
for (i = 0; i < nr_opts; ++i) {
int copylen = len;
if (len > DCCP_MAX_ACKVEC_OPT_LEN)
copylen = DCCP_MAX_ACKVEC_OPT_LEN;
*to++ = DCCPO_ACK_VECTOR_0;
*to++ = copylen + 2;
/* Check if buf_head wraps */
if (from + copylen > tail) {
const u16 tailsize = tail - from;
memcpy(to, from, tailsize);
to += tailsize;
len -= tailsize;
copylen -= tailsize;
from = av->dccpav_buf;
}
memcpy(to, from, copylen);
from += copylen;
to += copylen;
len -= copylen;
}
/*
* From RFC 4340, A.2:
*
* For each acknowledgement it sends, the HC-Receiver will add an
* acknowledgement record. ack_seqno will equal the HC-Receiver
* sequence number it used for the ack packet; ack_ptr will equal
* buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
* equal buf_nonce.
*/
avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
avr->dccpavr_ack_ptr = av->dccpav_buf_head;
avr->dccpavr_ack_ackno = av->dccpav_buf_ackno;
avr->dccpavr_ack_nonce = av->dccpav_buf_nonce;
avr->dccpavr_sent_len = av->dccpav_vec_len;
dccp_ackvec_insert_avr(av, avr);
dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu\n",
dccp_role(sk), avr->dccpavr_sent_len,
(unsigned long long)avr->dccpavr_ack_seqno,
(unsigned long long)avr->dccpavr_ack_ackno);
return 0;
}
struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
{
struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
if (av != NULL) {
av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1;
av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
av->dccpav_vec_len = 0;
INIT_LIST_HEAD(&av->dccpav_records);
}
return av;
}
void dccp_ackvec_free(struct dccp_ackvec *av)
{
if (unlikely(av == NULL))
return;
if (!list_empty(&av->dccpav_records)) {
struct dccp_ackvec_record *avr, *next;
list_for_each_entry_safe(avr, next, &av->dccpav_records,
dccpavr_node) {
list_del_init(&avr->dccpavr_node);
dccp_ackvec_record_delete(avr);
}
}
kmem_cache_free(dccp_ackvec_slab, av);
}
static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
const u32 index)
{
return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
}
static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
const u32 index)
{
return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
}
/*
* If several packets are missing, the HC-Receiver may prefer to enter multiple
* bytes with run length 0, rather than a single byte with a larger run length;
* this simplifies table updates if one of the missing packets arrives.
*/
static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
const unsigned int packets,
const unsigned char state)
{
unsigned int gap;
long new_head;
if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
return -ENOBUFS;
gap = packets - 1;
new_head = av->dccpav_buf_head - packets;
if (new_head < 0) {
if (gap > 0) {
memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
gap + new_head + 1);
gap = -new_head;
}
new_head += DCCP_MAX_ACKVEC_LEN;
}
av->dccpav_buf_head = new_head;
if (gap > 0)
memset(av->dccpav_buf + av->dccpav_buf_head + 1,
DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
av->dccpav_buf[av->dccpav_buf_head] = state;
av->dccpav_vec_len += packets;
return 0;
}
/*
* Implements the RFC 4340, Appendix A
*/
int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
const u64 ackno, const u8 state)
{
/*
* Check at the right places if the buffer is full, if it is, tell the
* caller to start dropping packets till the HC-Sender acks our ACK
* vectors, when we will free up space in dccpav_buf.
*
* We may well decide to do buffer compression, etc, but for now lets
* just drop.
*
* From Appendix A.1.1 (`New Packets'):
*
* Of course, the circular buffer may overflow, either when the
* HC-Sender is sending data at a very high rate, when the
* HC-Receiver's acknowledgements are not reaching the HC-Sender,
* or when the HC-Sender is forgetting to acknowledge those acks
* (so the HC-Receiver is unable to clean up old state). In this
* case, the HC-Receiver should either compress the buffer (by
* increasing run lengths when possible), transfer its state to
* a larger buffer, or, as a last resort, drop all received
* packets, without processing them whatsoever, until its buffer
* shrinks again.
*/
/* See if this is the first ackno being inserted */
if (av->dccpav_vec_len == 0) {
av->dccpav_buf[av->dccpav_buf_head] = state;
av->dccpav_vec_len = 1;
} else if (after48(ackno, av->dccpav_buf_ackno)) {
const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
ackno);
/*
* Look if the state of this packet is the same as the
* previous ackno and if so if we can bump the head len.
*/
if (delta == 1 &&
dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
(dccp_ackvec_len(av, av->dccpav_buf_head) <
DCCP_ACKVEC_LEN_MASK))
av->dccpav_buf[av->dccpav_buf_head]++;
else if (dccp_ackvec_set_buf_head_state(av, delta, state))
return -ENOBUFS;
} else {
/*
* A.1.2. Old Packets
*
* When a packet with Sequence Number S <= buf_ackno
* arrives, the HC-Receiver will scan the table for
* the byte corresponding to S. (Indexing structures
* could reduce the complexity of this scan.)
*/
u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
u32 index = av->dccpav_buf_head;
while (1) {
const u8 len = dccp_ackvec_len(av, index);
const u8 state = dccp_ackvec_state(av, index);
/*
* valid packets not yet in dccpav_buf have a reserved
* entry, with a len equal to 0.
*/
if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
len == 0 && delta == 0) { /* Found our
reserved seat! */
dccp_pr_debug("Found %llu reserved seat!\n",
(unsigned long long)ackno);
av->dccpav_buf[index] = state;
goto out;
}
/* len == 0 means one packet */
if (delta < len + 1)
goto out_duplicate;
delta -= len + 1;
if (++index == DCCP_MAX_ACKVEC_LEN)
index = 0;
}
}
av->dccpav_buf_ackno = ackno;
dccp_timestamp(sk, &av->dccpav_time);
out:
return 0;
out_duplicate:
/* Duplicate packet */
dccp_pr_debug("Received a dup or already considered lost "
"packet: %llu\n", (unsigned long long)ackno);
return -EILSEQ;
}
#ifdef CONFIG_IP_DCCP_DEBUG
void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
{
dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len,
(unsigned long long)ackno);
while (len--) {
const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
dccp_pr_debug_cat("%d,%d|", state, rl);
++vector;
}
dccp_pr_debug_cat("\n");
}
void dccp_ackvec_print(const struct dccp_ackvec *av)
{
dccp_ackvector_print(av->dccpav_buf_ackno,
av->dccpav_buf + av->dccpav_buf_head,
av->dccpav_vec_len);
}
#endif
static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
struct dccp_ackvec_record *avr)
{
struct dccp_ackvec_record *next;
/* sort out vector length */
if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
else
av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
- av->dccpav_buf_head
+ avr->dccpavr_ack_ptr;
/* free records */
list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
dccpavr_node) {
list_del_init(&avr->dccpavr_node);
dccp_ackvec_record_delete(avr);
}
}
void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
const u64 ackno)
{
struct dccp_ackvec_record *avr;
/*
* If we traverse backwards, it should be faster when we have large
* windows. We will be receiving ACKs for stuff we sent a while back
* -sorbo.
*/
list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
if (ackno == avr->dccpavr_ack_seqno) {
dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu, ACKED!\n",
dccp_role(sk), 1,
(unsigned long long)avr->dccpavr_ack_seqno,
(unsigned long long)avr->dccpavr_ack_ackno);
dccp_ackvec_throw_record(av, avr);
break;
} else if (avr->dccpavr_ack_seqno > ackno)
break; /* old news */
}
}
static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
struct sock *sk, u64 *ackno,
const unsigned char len,
const unsigned char *vector)
{
unsigned char i;
struct dccp_ackvec_record *avr;
/* Check if we actually sent an ACK vector */
if (list_empty(&av->dccpav_records))
return;
i = len;
/*
* XXX
* I think it might be more efficient to work backwards. See comment on
* rcv_ackno. -sorbo.
*/
avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record,
dccpavr_node);
while (i--) {
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
u64 ackno_end_rl;
dccp_set_seqno(&ackno_end_rl, *ackno - rl);
/*
* If our AVR sequence number is greater than the ack, go
* forward in the AVR list until it is not so.
*/
list_for_each_entry_from(avr, &av->dccpav_records,
dccpavr_node) {
if (!after48(avr->dccpavr_ack_seqno, *ackno))
goto found;
}
/* End of the dccpav_records list, not found, exit */
break;
found:
if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) {
const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
dccp_pr_debug("%s ACK vector 0, len=%d, "
"ack_seqno=%llu, ack_ackno=%llu, "
"ACKED!\n",
dccp_role(sk), len,
(unsigned long long)
avr->dccpavr_ack_seqno,
(unsigned long long)
avr->dccpavr_ack_ackno);
dccp_ackvec_throw_record(av, avr);
break;
}
/*
* If it wasn't received, continue scanning... we might
* find another one.
*/
}
dccp_set_seqno(ackno, ackno_end_rl - 1);
++vector;
}
}
int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
u64 *ackno, const u8 opt, const u8 *value, const u8 len)
{
if (len > DCCP_MAX_ACKVEC_OPT_LEN)
return -1;
/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
ackno, len, value);
return 0;
}
int __init dccp_ackvec_init(void)
{
dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
sizeof(struct dccp_ackvec), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (dccp_ackvec_slab == NULL)
goto out_err;
dccp_ackvec_record_slab =
kmem_cache_create("dccp_ackvec_record",
sizeof(struct dccp_ackvec_record),
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
if (dccp_ackvec_record_slab == NULL)
goto out_destroy_slab;
return 0;
out_destroy_slab:
kmem_cache_destroy(dccp_ackvec_slab);
dccp_ackvec_slab = NULL;
out_err:
DCCP_CRIT("Unable to create Ack Vector slab cache");
return -ENOBUFS;
}
void dccp_ackvec_exit(void)
{
if (dccp_ackvec_slab != NULL) {
kmem_cache_destroy(dccp_ackvec_slab);
dccp_ackvec_slab = NULL;
}
if (dccp_ackvec_record_slab != NULL) {
kmem_cache_destroy(dccp_ackvec_record_slab);
dccp_ackvec_record_slab = NULL;
}
}

158
net/dccp/ackvec.h Normal file
View File

@@ -0,0 +1,158 @@
#ifndef _ACKVEC_H
#define _ACKVEC_H
/*
* net/dccp/ackvec.h
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/time.h>
#include <linux/types.h>
/* Read about the ECN nonce to see why it is 253 */
#define DCCP_MAX_ACKVEC_OPT_LEN 253
/* We can spread an ack vector across multiple options */
#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2)
#define DCCP_ACKVEC_STATE_RECEIVED 0
#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */
#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */
/** struct dccp_ackvec - ack vector
*
* This data structure is the one defined in RFC 4340, Appendix A.
*
* @dccpav_buf_head - circular buffer head
* @dccpav_buf_tail - circular buffer tail
* @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
* buffer (i.e. %dccpav_buf_head)
* @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
* by the buffer with State 0
*
* Additionally, the HC-Receiver must keep some information about the
* Ack Vectors it has recently sent. For each packet sent carrying an
* Ack Vector, it remembers four variables:
*
* @dccpav_records - list of dccp_ackvec_record
* @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
*
* @dccpav_time - the time in usecs
* @dccpav_buf - circular buffer of acknowledgeable packets
*/
struct dccp_ackvec {
u64 dccpav_buf_ackno;
struct list_head dccpav_records;
struct timeval dccpav_time;
u16 dccpav_buf_head;
u16 dccpav_vec_len;
u8 dccpav_buf_nonce;
u8 dccpav_ack_nonce;
u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN];
};
/** struct dccp_ackvec_record - ack vector record
*
* ACK vector record as defined in Appendix A of spec.
*
* The list is sorted by dccpavr_ack_seqno
*
* @dccpavr_node - node in dccpav_records
* @dccpavr_ack_seqno - sequence number of the packet this record was sent on
* @dccpavr_ack_ackno - sequence number being acknowledged
* @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts
* @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent
* @dccpavr_sent_len - lenght of the record in dccpav_buf
*/
struct dccp_ackvec_record {
struct list_head dccpavr_node;
u64 dccpavr_ack_seqno;
u64 dccpavr_ack_ackno;
u16 dccpavr_ack_ptr;
u16 dccpavr_sent_len;
u8 dccpavr_ack_nonce;
};
struct sock;
struct sk_buff;
#ifdef CONFIG_IP_DCCP_ACKVEC
extern int dccp_ackvec_init(void);
extern void dccp_ackvec_exit(void);
extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
extern void dccp_ackvec_free(struct dccp_ackvec *av);
extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
const u64 ackno, const u8 state);
extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
struct sock *sk, const u64 ackno);
extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
u64 *ackno, const u8 opt,
const u8 *value, const u8 len);
extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
{
return av->dccpav_vec_len;
}
#else /* CONFIG_IP_DCCP_ACKVEC */
static inline int dccp_ackvec_init(void)
{
return 0;
}
static inline void dccp_ackvec_exit(void)
{
}
static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
{
return NULL;
}
static inline void dccp_ackvec_free(struct dccp_ackvec *av)
{
}
static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
const u64 ackno, const u8 state)
{
return -1;
}
static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
struct sock *sk, const u64 ackno)
{
}
static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
const u64 *ackno, const u8 opt,
const u8 *value, const u8 len)
{
return -1;
}
static inline int dccp_insert_option_ackvec(const struct sock *sk,
const struct sk_buff *skb)
{
return -1;
}
static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
{
return 0;
}
#endif /* CONFIG_IP_DCCP_ACKVEC */
#endif /* _ACKVEC_H */

256
net/dccp/ccid.c Normal file
View File

@@ -0,0 +1,256 @@
/*
* net/dccp/ccid.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "ccid.h"
static struct ccid_operations *ccids[CCID_MAX];
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t ccids_lockct = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(ccids_lock);
/*
* The strategy is: modifications ccids vector are short, do not sleep and
* veeery rare, but read access should be free of any exclusive locks.
*/
static void ccids_write_lock(void)
{
spin_lock(&ccids_lock);
while (atomic_read(&ccids_lockct) != 0) {
spin_unlock(&ccids_lock);
yield();
spin_lock(&ccids_lock);
}
}
static inline void ccids_write_unlock(void)
{
spin_unlock(&ccids_lock);
}
static inline void ccids_read_lock(void)
{
atomic_inc(&ccids_lockct);
spin_unlock_wait(&ccids_lock);
}
static inline void ccids_read_unlock(void)
{
atomic_dec(&ccids_lockct);
}
#else
#define ccids_write_lock() do { } while(0)
#define ccids_write_unlock() do { } while(0)
#define ccids_read_lock() do { } while(0)
#define ccids_read_unlock() do { } while(0)
#endif
static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
{
struct kmem_cache *slab;
char slab_name_fmt[32], *slab_name;
va_list args;
va_start(args, fmt);
vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args);
va_end(args);
slab_name = kstrdup(slab_name_fmt, GFP_KERNEL);
if (slab_name == NULL)
return NULL;
slab = kmem_cache_create(slab_name, sizeof(struct ccid) + obj_size, 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (slab == NULL)
kfree(slab_name);
return slab;
}
static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
{
if (slab != NULL) {
const char *name = kmem_cache_name(slab);
kmem_cache_destroy(slab);
kfree(name);
}
}
int ccid_register(struct ccid_operations *ccid_ops)
{
int err = -ENOBUFS;
ccid_ops->ccid_hc_rx_slab =
ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size,
"%s_hc_rx_sock",
ccid_ops->ccid_name);
if (ccid_ops->ccid_hc_rx_slab == NULL)
goto out;
ccid_ops->ccid_hc_tx_slab =
ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size,
"%s_hc_tx_sock",
ccid_ops->ccid_name);
if (ccid_ops->ccid_hc_tx_slab == NULL)
goto out_free_rx_slab;
ccids_write_lock();
err = -EEXIST;
if (ccids[ccid_ops->ccid_id] == NULL) {
ccids[ccid_ops->ccid_id] = ccid_ops;
err = 0;
}
ccids_write_unlock();
if (err != 0)
goto out_free_tx_slab;
pr_info("CCID: Registered CCID %d (%s)\n",
ccid_ops->ccid_id, ccid_ops->ccid_name);
out:
return err;
out_free_tx_slab:
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
ccid_ops->ccid_hc_tx_slab = NULL;
goto out;
out_free_rx_slab:
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
ccid_ops->ccid_hc_rx_slab = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(ccid_register);
int ccid_unregister(struct ccid_operations *ccid_ops)
{
ccids_write_lock();
ccids[ccid_ops->ccid_id] = NULL;
ccids_write_unlock();
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
ccid_ops->ccid_hc_tx_slab = NULL;
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
ccid_ops->ccid_hc_rx_slab = NULL;
pr_info("CCID: Unregistered CCID %d (%s)\n",
ccid_ops->ccid_id, ccid_ops->ccid_name);
return 0;
}
EXPORT_SYMBOL_GPL(ccid_unregister);
struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
{
struct ccid_operations *ccid_ops;
struct ccid *ccid = NULL;
ccids_read_lock();
#ifdef CONFIG_KMOD
if (ccids[id] == NULL) {
/* We only try to load if in process context */
ccids_read_unlock();
if (gfp & GFP_ATOMIC)
goto out;
request_module("net-dccp-ccid-%d", id);
ccids_read_lock();
}
#endif
ccid_ops = ccids[id];
if (ccid_ops == NULL)
goto out_unlock;
if (!try_module_get(ccid_ops->ccid_owner))
goto out_unlock;
ccids_read_unlock();
ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
ccid_ops->ccid_hc_tx_slab, gfp);
if (ccid == NULL)
goto out_module_put;
ccid->ccid_ops = ccid_ops;
if (rx) {
memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size);
if (ccid->ccid_ops->ccid_hc_rx_init != NULL &&
ccid->ccid_ops->ccid_hc_rx_init(ccid, sk) != 0)
goto out_free_ccid;
} else {
memset(ccid + 1, 0, ccid_ops->ccid_hc_tx_obj_size);
if (ccid->ccid_ops->ccid_hc_tx_init != NULL &&
ccid->ccid_ops->ccid_hc_tx_init(ccid, sk) != 0)
goto out_free_ccid;
}
out:
return ccid;
out_unlock:
ccids_read_unlock();
goto out;
out_free_ccid:
kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab :
ccid_ops->ccid_hc_tx_slab, ccid);
ccid = NULL;
out_module_put:
module_put(ccid_ops->ccid_owner);
goto out;
}
EXPORT_SYMBOL_GPL(ccid_new);
struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
{
return ccid_new(id, sk, 1, gfp);
}
EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp)
{
return ccid_new(id, sk, 0, gfp);
}
EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
{
struct ccid_operations *ccid_ops;
if (ccid == NULL)
return;
ccid_ops = ccid->ccid_ops;
if (rx) {
if (ccid_ops->ccid_hc_rx_exit != NULL)
ccid_ops->ccid_hc_rx_exit(sk);
kmem_cache_free(ccid_ops->ccid_hc_rx_slab, ccid);
} else {
if (ccid_ops->ccid_hc_tx_exit != NULL)
ccid_ops->ccid_hc_tx_exit(sk);
kmem_cache_free(ccid_ops->ccid_hc_tx_slab, ccid);
}
ccids_read_lock();
if (ccids[ccid_ops->ccid_id] != NULL)
module_put(ccid_ops->ccid_owner);
ccids_read_unlock();
}
void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
{
ccid_delete(ccid, sk, 1);
}
EXPORT_SYMBOL_GPL(ccid_hc_rx_delete);
void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
{
ccid_delete(ccid, sk, 0);
}
EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);

190
net/dccp/ccid.h Normal file
View File

@@ -0,0 +1,190 @@
#ifndef _CCID_H
#define _CCID_H
/*
* net/dccp/ccid.h
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* CCID infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <net/sock.h>
#include <linux/compiler.h>
#include <linux/dccp.h>
#include <linux/list.h>
#include <linux/module.h>
#define CCID_MAX 255
struct tcp_info;
struct ccid_operations {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
struct kmem_cache *ccid_hc_rx_slab;
__u32 ccid_hc_rx_obj_size;
struct kmem_cache *ccid_hc_tx_slab;
__u32 ccid_hc_tx_obj_size;
int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
void (*ccid_hc_rx_exit)(struct sock *sk);
void (*ccid_hc_tx_exit)(struct sock *sk);
void (*ccid_hc_rx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
int (*ccid_hc_rx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_rx_insert_options)(struct sock *sk,
struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
int (*ccid_hc_tx_parse_options)(struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
struct sk_buff *skb);
void (*ccid_hc_tx_packet_sent)(struct sock *sk,
int more, unsigned int len);
void (*ccid_hc_rx_get_info)(struct sock *sk,
struct tcp_info *info);
void (*ccid_hc_tx_get_info)(struct sock *sk,
struct tcp_info *info);
int (*ccid_hc_rx_getsockopt)(struct sock *sk,
const int optname, int len,
u32 __user *optval,
int __user *optlen);
int (*ccid_hc_tx_getsockopt)(struct sock *sk,
const int optname, int len,
u32 __user *optval,
int __user *optlen);
};
extern int ccid_register(struct ccid_operations *ccid_ops);
extern int ccid_unregister(struct ccid_operations *ccid_ops);
struct ccid {
struct ccid_operations *ccid_ops;
char ccid_priv[0];
};
static inline void *ccid_priv(const struct ccid *ccid)
{
return (void *)ccid->ccid_priv;
}
extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
gfp_t gfp);
extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
gfp_t gfp);
extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
gfp_t gfp);
extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
int rc = 0;
if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
int more, unsigned int len)
{
if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
}
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_ops->ccid_hc_rx_packet_recv != NULL)
ccid->ccid_ops->ccid_hc_rx_packet_recv(sk, skb);
}
static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_ops->ccid_hc_tx_packet_recv != NULL)
ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
}
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx,
value);
return rc;
}
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value)
{
int rc = 0;
if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL)
rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value);
return rc;
}
static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
if (ccid->ccid_ops->ccid_hc_rx_insert_options != NULL)
return ccid->ccid_ops->ccid_hc_rx_insert_options(sk, skb);
return 0;
}
static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
struct tcp_info *info)
{
if (ccid->ccid_ops->ccid_hc_rx_get_info != NULL)
ccid->ccid_ops->ccid_hc_rx_get_info(sk, info);
}
static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
struct tcp_info *info)
{
if (ccid->ccid_ops->ccid_hc_tx_get_info != NULL)
ccid->ccid_ops->ccid_hc_tx_get_info(sk, info);
}
static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
const int optname, int len,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
}
static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
const int optname, int len,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
}
#endif /* _CCID_H */

125
net/dccp/ccids/Kconfig Normal file
View File

@@ -0,0 +1,125 @@
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on IP_DCCP && EXPERIMENTAL
config IP_DCCP_CCID2
tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
depends on IP_DCCP
def_tristate IP_DCCP
select IP_DCCP_ACKVEC
---help---
CCID 2, TCP-like Congestion Control, denotes Additive Increase,
Multiplicative Decrease (AIMD) congestion control with behavior
modelled directly on TCP, including congestion window, slow start,
timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum
bandwidth over the long term, consistent with the use of end-to-end
congestion control, but halves its congestion window in response to
each congestion event. This leads to the abrupt rate changes
typical of TCP. Applications should use CCID 2 if they prefer
maximum bandwidth utilization to steadiness of rate. This is often
the case for applications that are not playing their data directly
to the user. For example, a hypothetical application that
transferred files over DCCP, using application-level retransmissions
for lost packets, would prefer CCID 2 to CCID 3. On-line games may
also prefer CCID 2.
CCID 2 is further described in RFC 4341,
http://www.ietf.org/rfc/rfc4341.txt
This text was extracted from RFC 4340 (sec. 10.1),
http://www.ietf.org/rfc/rfc4340.txt
To compile this CCID as a module, choose M here: the module will be
called dccp_ccid2.
If in doubt, say M.
config IP_DCCP_CCID2_DEBUG
bool "CCID2 debugging messages"
depends on IP_DCCP_CCID2
---help---
Enable CCID2-specific debugging messages.
When compiling CCID2 as a module, this debugging output can
additionally be toggled by setting the ccid2_debug module
parameter to 0 or 1.
If in doubt, say N.
config IP_DCCP_CCID3
tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
depends on IP_DCCP
def_tristate IP_DCCP
---help---
CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
where a flow is "reasonably fair" if its sending rate is generally
within a factor of two of the sending rate of a TCP flow under the
same conditions. However, TFRC has a much lower variation of
throughput over time compared with TCP, which makes CCID 3 more
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
CCID 3 is further described in RFC 4342,
http://www.ietf.org/rfc/rfc4342.txt
The TFRC congestion control algorithms were initially described in
RFC 3448.
This text was extracted from RFC 4340 (sec. 10.2),
http://www.ietf.org/rfc/rfc4340.txt
To compile this CCID as a module, choose M here: the module will be
called dccp_ccid3.
If in doubt, say M.
config IP_DCCP_TFRC_LIB
depends on IP_DCCP_CCID3
def_tristate IP_DCCP_CCID3
config IP_DCCP_CCID3_DEBUG
bool "CCID3 debugging messages"
depends on IP_DCCP_CCID3
---help---
Enable CCID3-specific debugging messages.
When compiling CCID3 as a module, this debugging output can
additionally be toggled by setting the ccid3_debug module
parameter to 0 or 1.
If in doubt, say N.
config IP_DCCP_CCID3_RTO
int "Use higher bound for nofeedback timer"
default 100
depends on IP_DCCP_CCID3 && EXPERIMENTAL
---help---
Use higher lower bound for nofeedback timer expiration.
The TFRC nofeedback timer normally expires after the maximum of 4
RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
with a small RTT this can mean a high processing load and reduced
performance, since then the nofeedback timer is triggered very
frequently.
This option enables to set a higher lower bound for the nofeedback
value. Values in units of milliseconds can be set here.
A value of 0 disables this feature by enforcing the value specified
in RFC 3448. The following values have been suggested as bounds for
experimental use:
* 16-20ms to match the typical multimedia inter-frame interval
* 100ms as a reasonable compromise [default]
* 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
The default of 100ms is a compromise between a large value for
efficient DCCP implementations, and a small value to avoid disrupting
the network in times of congestion.
The purpose of the nofeedback timer is to slow DCCP down when there
is serious network congestion: experimenting with larger values should
therefore not be performed on WANs.
endmenu

9
net/dccp/ccids/Makefile Normal file
View File

@@ -0,0 +1,9 @@
obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
dccp_ccid3-y := ccid3.o
obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o
dccp_ccid2-y := ccid2.o
obj-y += lib/

857
net/dccp/ccids/ccid2.c Normal file
View File

@@ -0,0 +1,857 @@
/*
* net/dccp/ccids/ccid2.c
*
* Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
*
* Changes to meet Linux coding standards, and DCCP infrastructure fixes.
*
* Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* This implementation should follow RFC 4341
*
* BUGS:
* - sequence number wrapping
*/
#include "../ccid.h"
#include "../dccp.h"
#include "ccid2.h"
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
static int ccid2_debug;
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
{
int len = 0;
int pipe = 0;
struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
/* there is data in the chain */
if (seqp != hctx->ccid2hctx_seqt) {
seqp = seqp->ccid2s_prev;
len++;
if (!seqp->ccid2s_acked)
pipe++;
while (seqp != hctx->ccid2hctx_seqt) {
struct ccid2_seq *prev = seqp->ccid2s_prev;
len++;
if (!prev->ccid2s_acked)
pipe++;
/* packets are sent sequentially */
BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
BUG_ON(time_before(seqp->ccid2s_sent,
prev->ccid2s_sent));
seqp = prev;
}
}
BUG_ON(pipe != hctx->ccid2hctx_pipe);
ccid2_pr_debug("len of chain=%d\n", len);
do {
seqp = seqp->ccid2s_prev;
len++;
} while (seqp != hctx->ccid2hctx_seqh);
ccid2_pr_debug("total len=%d\n", len);
BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
}
#else
#define ccid2_pr_debug(format, a...)
#define ccid2_hc_tx_check_sanity(hctx)
#endif
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
gfp_t gfp)
{
struct ccid2_seq *seqp;
int i;
/* check if we have space to preserve the pointer to the buffer */
if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
sizeof(struct ccid2_seq*)))
return -ENOMEM;
/* allocate buffer and initialize linked list */
seqp = kmalloc(sizeof(*seqp) * num, gfp);
if (seqp == NULL)
return -ENOMEM;
for (i = 0; i < (num - 1); i++) {
seqp[i].ccid2s_next = &seqp[i + 1];
seqp[i + 1].ccid2s_prev = &seqp[i];
}
seqp[num - 1].ccid2s_next = seqp;
seqp->ccid2s_prev = &seqp[num - 1];
/* This is the first allocation. Initiate the head and tail. */
if (hctx->ccid2hctx_seqbufc == 0)
hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
else {
/* link the existing list with the one we just created */
hctx->ccid2hctx_seqh->ccid2s_next = seqp;
seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1];
seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt;
}
/* store the original pointer to the buffer so we can free it */
hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
hctx->ccid2hctx_seqbufc++;
return 0;
}
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
struct ccid2_hc_tx_sock *hctx;
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case 0: /* XXX data packets from userland come through like this */
case DCCP_PKT_DATA:
case DCCP_PKT_DATAACK:
break;
/* No congestion control on other packets */
default:
return 0;
}
hctx = ccid2_hc_tx_sk(sk);
ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe,
hctx->ccid2hctx_cwnd);
if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
/* OK we can send... make sure previous packet was sent off */
if (!hctx->ccid2hctx_sendwait) {
hctx->ccid2hctx_sendwait = 1;
return 0;
}
}
return 1; /* XXX CCID should dequeue when ready instead of polling */
}
static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
{
struct dccp_sock *dp = dccp_sk(sk);
/*
* XXX I don't really agree with val != 2. If cwnd is 1, ack ratio
* should be 1... it shouldn't be allowed to become 2.
* -sorbo.
*/
if (val != 2) {
const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
int max = hctx->ccid2hctx_cwnd / 2;
/* round up */
if (hctx->ccid2hctx_cwnd & 1)
max++;
if (val > max)
val = max;
}
ccid2_pr_debug("changing local ack ratio to %d\n", val);
WARN_ON(val <= 0);
dp->dccps_l_ack_ratio = val;
}
static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
{
if (val == 0)
val = 1;
/* XXX do we need to change ack ratio? */
ccid2_pr_debug("change cwnd to %d\n", val);
BUG_ON(val < 1);
hctx->ccid2hctx_cwnd = val;
}
static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
{
ccid2_pr_debug("change SRTT to %ld\n", val);
hctx->ccid2hctx_srtt = val;
}
static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
{
hctx->ccid2hctx_pipe = val;
}
static void ccid2_start_rto_timer(struct sock *sk);
static void ccid2_hc_tx_rto_expire(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
long s;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
jiffies + HZ / 5);
goto out;
}
ccid2_pr_debug("RTO_EXPIRE\n");
ccid2_hc_tx_check_sanity(hctx);
/* back-off timer */
hctx->ccid2hctx_rto <<= 1;
s = hctx->ccid2hctx_rto / HZ;
if (s > 60)
hctx->ccid2hctx_rto = 60 * HZ;
ccid2_start_rto_timer(sk);
/* adjust pipe, cwnd etc */
ccid2_change_pipe(hctx, 0);
hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
if (hctx->ccid2hctx_ssthresh < 2)
hctx->ccid2hctx_ssthresh = 2;
ccid2_change_cwnd(hctx, 1);
/* clear state about stuff we sent */
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
hctx->ccid2hctx_ssacks = 0;
hctx->ccid2hctx_acks = 0;
hctx->ccid2hctx_sent = 0;
/* clear ack ratio state. */
hctx->ccid2hctx_arsent = 0;
hctx->ccid2hctx_ackloss = 0;
hctx->ccid2hctx_rpseq = 0;
hctx->ccid2hctx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
ccid2_hc_tx_check_sanity(hctx);
out:
bh_unlock_sock(sk);
sock_put(sk);
}
static void ccid2_start_rto_timer(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
jiffies + hctx->ccid2hctx_rto);
}
static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
struct ccid2_seq *next;
u64 seq;
ccid2_hc_tx_check_sanity(hctx);
BUG_ON(!hctx->ccid2hctx_sendwait);
hctx->ccid2hctx_sendwait = 0;
ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
BUG_ON(hctx->ccid2hctx_pipe < 0);
/* There is an issue. What if another packet is sent between
* packet_send() and packet_sent(). Then the sequence number would be
* wrong.
* -sorbo.
*/
seq = dp->dccps_gss;
hctx->ccid2hctx_seqh->ccid2s_seq = seq;
hctx->ccid2hctx_seqh->ccid2s_acked = 0;
hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
next = hctx->ccid2hctx_seqh->ccid2s_next;
/* check if we need to alloc more space */
if (next == hctx->ccid2hctx_seqt) {
int rc;
ccid2_pr_debug("allocating more space in history\n");
rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL);
BUG_ON(rc); /* XXX what do we do? */
next = hctx->ccid2hctx_seqh->ccid2s_next;
BUG_ON(next == hctx->ccid2hctx_seqt);
}
hctx->ccid2hctx_seqh = next;
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
hctx->ccid2hctx_pipe);
hctx->ccid2hctx_sent++;
/* Ack Ratio. Need to maintain a concept of how many windows we sent */
hctx->ccid2hctx_arsent++;
/* We had an ack loss in this window... */
if (hctx->ccid2hctx_ackloss) {
if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
hctx->ccid2hctx_arsent = 0;
hctx->ccid2hctx_ackloss = 0;
}
} else {
/* No acks lost up to now... */
/* decrease ack ratio if enough packets were sent */
if (dp->dccps_l_ack_ratio > 1) {
/* XXX don't calculate denominator each time */
int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
dp->dccps_l_ack_ratio;
denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
if (hctx->ccid2hctx_arsent >= denom) {
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
hctx->ccid2hctx_arsent = 0;
}
} else {
/* we can't increase ack ratio further [1] */
hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
}
}
/* setup RTO timer */
if (!timer_pending(&hctx->ccid2hctx_rtotimer))
ccid2_start_rto_timer(sk);
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
ccid2_pr_debug("Sent: seq=%llu\n", (unsigned long long)seq);
do {
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
while (seqp != hctx->ccid2hctx_seqh) {
ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
(unsigned long long)seqp->ccid2s_seq,
seqp->ccid2s_acked, seqp->ccid2s_sent);
seqp = seqp->ccid2s_next;
}
} while (0);
ccid2_pr_debug("=========\n");
ccid2_hc_tx_check_sanity(hctx);
#endif
}
/* XXX Lame code duplication!
* returns -1 if none was found.
* else returns the next offset to use in the function call.
*/
static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
unsigned char **vec, unsigned char *veclen)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr;
const unsigned char *opt_end = (unsigned char *)dh +
(dh->dccph_doff * 4);
unsigned char opt, len;
unsigned char *value;
BUG_ON(offset < 0);
options += offset;
opt_ptr = options;
if (opt_ptr >= opt_end)
return -1;
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
value = NULL;
/* Check if this isn't a single byte option */
if (opt > DCCPO_MAX_RESERVED) {
if (opt_ptr == opt_end)
goto out_invalid_option;
len = *opt_ptr++;
if (len < 3)
goto out_invalid_option;
/*
* Remove the type and len fields, leaving
* just the value size
*/
len -= 2;
value = opt_ptr;
opt_ptr += len;
if (opt_ptr > opt_end)
goto out_invalid_option;
}
switch (opt) {
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
*vec = value;
*veclen = len;
return offset + (opt_ptr - options);
}
}
return -1;
out_invalid_option:
DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
return -1;
}
static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
ccid2_pr_debug("deleted RTO timer\n");
}
static inline void ccid2_new_ack(struct sock *sk,
struct ccid2_seq *seqp,
unsigned int *maxincr)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
/* slow start */
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
hctx->ccid2hctx_acks = 0;
/* We can increase cwnd at most maxincr [ack_ratio/2] */
if (*maxincr) {
/* increase every 2 acks */
hctx->ccid2hctx_ssacks++;
if (hctx->ccid2hctx_ssacks == 2) {
ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
hctx->ccid2hctx_ssacks = 0;
*maxincr = *maxincr - 1;
}
} else {
/* increased cwnd enough for this single ack */
hctx->ccid2hctx_ssacks = 0;
}
} else {
hctx->ccid2hctx_ssacks = 0;
hctx->ccid2hctx_acks++;
if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
hctx->ccid2hctx_acks = 0;
}
}
/* update RTO */
if (hctx->ccid2hctx_srtt == -1 ||
time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
int s;
/* first measurement */
if (hctx->ccid2hctx_srtt == -1) {
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
r, jiffies,
(unsigned long long)seqp->ccid2s_seq);
ccid2_change_srtt(hctx, r);
hctx->ccid2hctx_rttvar = r >> 1;
} else {
/* RTTVAR */
long tmp = hctx->ccid2hctx_srtt - r;
long srtt;
if (tmp < 0)
tmp *= -1;
tmp >>= 2;
hctx->ccid2hctx_rttvar *= 3;
hctx->ccid2hctx_rttvar >>= 2;
hctx->ccid2hctx_rttvar += tmp;
/* SRTT */
srtt = hctx->ccid2hctx_srtt;
srtt *= 7;
srtt >>= 3;
tmp = r >> 3;
srtt += tmp;
ccid2_change_srtt(hctx, srtt);
}
s = hctx->ccid2hctx_rttvar << 2;
/* clock granularity is 1 when based on jiffies */
if (!s)
s = 1;
hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
/* must be at least a second */
s = hctx->ccid2hctx_rto / HZ;
/* DCCP doesn't require this [but I like it cuz my code sux] */
#if 1
if (s < 1)
hctx->ccid2hctx_rto = HZ;
#endif
/* max 60 seconds */
if (s > 60)
hctx->ccid2hctx_rto = HZ * 60;
hctx->ccid2hctx_lastrtt = jiffies;
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
hctx->ccid2hctx_rto, HZ, r);
hctx->ccid2hctx_sent = 0;
}
/* we got a new ack, so re-start RTO timer */
ccid2_hc_tx_kill_rto_timer(sk);
ccid2_start_rto_timer(sk);
}
static void ccid2_hc_tx_dec_pipe(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
BUG_ON(hctx->ccid2hctx_pipe < 0);
if (hctx->ccid2hctx_pipe == 0)
ccid2_hc_tx_kill_rto_timer(sk);
}
static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
struct ccid2_seq *seqp)
{
if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
return;
}
hctx->ccid2hctx_last_cong = jiffies;
ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
if (hctx->ccid2hctx_ssthresh < 2)
hctx->ccid2hctx_ssthresh = 2;
}
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
u64 ackno, seqno;
struct ccid2_seq *seqp;
unsigned char *vector;
unsigned char veclen;
int offset = 0;
int done = 0;
unsigned int maxincr = 0;
ccid2_hc_tx_check_sanity(hctx);
/* check reverse path congestion */
seqno = DCCP_SKB_CB(skb)->dccpd_seq;
/* XXX this whole "algorithm" is broken. Need to fix it to keep track
* of the seqnos of the dupacks so that rpseq and rpdupack are correct
* -sorbo.
*/
/* need to bootstrap */
if (hctx->ccid2hctx_rpdupack == -1) {
hctx->ccid2hctx_rpdupack = 0;
hctx->ccid2hctx_rpseq = seqno;
} else {
/* check if packet is consecutive */
if ((hctx->ccid2hctx_rpseq + 1) == seqno)
hctx->ccid2hctx_rpseq++;
/* it's a later packet */
else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
hctx->ccid2hctx_rpdupack++;
/* check if we got enough dupacks */
if (hctx->ccid2hctx_rpdupack >=
hctx->ccid2hctx_numdupack) {
hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
hctx->ccid2hctx_rpseq = 0;
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1);
}
}
}
/* check forward path congestion */
/* still didn't send out new data packets */
if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
return;
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case DCCP_PKT_ACK:
case DCCP_PKT_DATAACK:
break;
default:
return;
}
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
if (after48(ackno, hctx->ccid2hctx_high_ack))
hctx->ccid2hctx_high_ack = ackno;
seqp = hctx->ccid2hctx_seqt;
while (before48(seqp->ccid2s_seq, ackno)) {
seqp = seqp->ccid2s_next;
if (seqp == hctx->ccid2hctx_seqh) {
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
break;
}
}
/* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
* this single ack. I round up.
* -sorbo.
*/
maxincr = dp->dccps_l_ack_ratio >> 1;
maxincr++;
/* go through all ack vectors */
while ((offset = ccid2_ackvector(sk, skb, offset,
&vector, &veclen)) != -1) {
/* go through this ack vector */
while (veclen--) {
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
u64 ackno_end_rl;
dccp_set_seqno(&ackno_end_rl, ackno - rl);
ccid2_pr_debug("ackvec start:%llu end:%llu\n",
(unsigned long long)ackno,
(unsigned long long)ackno_end_rl);
/* if the seqno we are analyzing is larger than the
* current ackno, then move towards the tail of our
* seqnos.
*/
while (after48(seqp->ccid2s_seq, ackno)) {
if (seqp == hctx->ccid2hctx_seqt) {
done = 1;
break;
}
seqp = seqp->ccid2s_prev;
}
if (done)
break;
/* check all seqnos in the range of the vector
* run length
*/
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
const u8 state = *vector &
DCCP_ACKVEC_STATE_MASK;
/* new packet received or marked */
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
!seqp->ccid2s_acked) {
if (state ==
DCCP_ACKVEC_STATE_ECN_MARKED) {
ccid2_congestion_event(hctx,
seqp);
} else
ccid2_new_ack(sk, seqp,
&maxincr);
seqp->ccid2s_acked = 1;
ccid2_pr_debug("Got ack for %llu\n",
(unsigned long long)seqp->ccid2s_seq);
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->ccid2hctx_seqt) {
done = 1;
break;
}
seqp = seqp->ccid2s_next;
}
if (done)
break;
dccp_set_seqno(&ackno, ackno_end_rl - 1);
vector++;
}
if (done)
break;
}
/* The state about what is acked should be correct now
* Check for NUMDUPACK
*/
seqp = hctx->ccid2hctx_seqt;
while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
seqp = seqp->ccid2s_next;
if (seqp == hctx->ccid2hctx_seqh) {
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
break;
}
}
done = 0;
while (1) {
if (seqp->ccid2s_acked) {
done++;
if (done == hctx->ccid2hctx_numdupack)
break;
}
if (seqp == hctx->ccid2hctx_seqt)
break;
seqp = seqp->ccid2s_prev;
}
/* If there are at least 3 acknowledgements, anything unacknowledged
* below the last sequence number is considered lost
*/
if (done == hctx->ccid2hctx_numdupack) {
struct ccid2_seq *last_acked = seqp;
/* check for lost packets */
while (1) {
if (!seqp->ccid2s_acked) {
ccid2_pr_debug("Packet lost: %llu\n",
(unsigned long long)seqp->ccid2s_seq);
/* XXX need to traverse from tail -> head in
* order to detect multiple congestion events in
* one ack vector.
*/
ccid2_congestion_event(hctx, seqp);
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->ccid2hctx_seqt)
break;
seqp = seqp->ccid2s_prev;
}
hctx->ccid2hctx_seqt = last_acked;
}
/* trim acked packets in tail */
while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
if (!hctx->ccid2hctx_seqt->ccid2s_acked)
break;
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
}
ccid2_hc_tx_check_sanity(hctx);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
ccid2_change_cwnd(hctx, 1);
/* Initialize ssthresh to infinity. This means that we will exit the
* initial slow-start after the first packet loss. This is what we
* want.
*/
hctx->ccid2hctx_ssthresh = ~0;
hctx->ccid2hctx_numdupack = 3;
hctx->ccid2hctx_seqbufc = 0;
/* XXX init ~ to window size... */
if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0)
return -ENOMEM;
hctx->ccid2hctx_sent = 0;
hctx->ccid2hctx_rto = 3 * HZ;
ccid2_change_srtt(hctx, -1);
hctx->ccid2hctx_rttvar = -1;
hctx->ccid2hctx_lastrtt = 0;
hctx->ccid2hctx_rpdupack = -1;
hctx->ccid2hctx_last_cong = jiffies;
hctx->ccid2hctx_high_ack = 0;
hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
init_timer(&hctx->ccid2hctx_rtotimer);
ccid2_hc_tx_check_sanity(hctx);
return 0;
}
static void ccid2_hc_tx_exit(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
int i;
ccid2_hc_tx_kill_rto_timer(sk);
for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
kfree(hctx->ccid2hctx_seqbuf[i]);
hctx->ccid2hctx_seqbufc = 0;
}
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_rx_sock *hcrx = ccid2_hc_rx_sk(sk);
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case DCCP_PKT_DATA:
case DCCP_PKT_DATAACK:
hcrx->ccid2hcrx_data++;
if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
dccp_send_ack(sk);
hcrx->ccid2hcrx_data = 0;
}
break;
}
}
static struct ccid_operations ccid2 = {
.ccid_id = DCCPC_CCID2,
.ccid_name = "ccid2",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
};
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
module_param(ccid2_debug, int, 0444);
MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
#endif
static __init int ccid2_module_init(void)
{
return ccid_register(&ccid2);
}
module_init(ccid2_module_init);
static __exit void ccid2_module_exit(void)
{
ccid_unregister(&ccid2);
}
module_exit(ccid2_module_exit);
MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>");
MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID");
MODULE_LICENSE("GPL");
MODULE_ALIAS("net-dccp-ccid-2");

91
net/dccp/ccids/ccid2.h Normal file
View File

@@ -0,0 +1,91 @@
/*
* net/dccp/ccids/ccid2.h
*
* Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _DCCP_CCID2_H_
#define _DCCP_CCID2_H_
#include <linux/dccp.h>
#include <linux/timer.h>
#include <linux/types.h>
#include "../ccid.h"
struct sock;
struct ccid2_seq {
u64 ccid2s_seq;
unsigned long ccid2s_sent;
int ccid2s_acked;
struct ccid2_seq *ccid2s_prev;
struct ccid2_seq *ccid2s_next;
};
#define CCID2_SEQBUF_LEN 1024
#define CCID2_SEQBUF_MAX 128
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
*
* @ccid2hctx_ssacks - ACKs recv in slow start
* @ccid2hctx_acks - ACKS recv in AI phase
* @ccid2hctx_sent - packets sent in this window
* @ccid2hctx_lastrtt -time RTT was last measured
* @ccid2hctx_arsent - packets sent [ack ratio]
* @ccid2hctx_ackloss - ack was lost in this win
* @ccid2hctx_rpseq - last consecutive seqno
* @ccid2hctx_rpdupack - dupacks since rpseq
*/
struct ccid2_hc_tx_sock {
int ccid2hctx_cwnd;
int ccid2hctx_ssacks;
int ccid2hctx_acks;
unsigned int ccid2hctx_ssthresh;
int ccid2hctx_pipe;
int ccid2hctx_numdupack;
struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
int ccid2hctx_seqbufc;
struct ccid2_seq *ccid2hctx_seqh;
struct ccid2_seq *ccid2hctx_seqt;
long ccid2hctx_rto;
long ccid2hctx_srtt;
long ccid2hctx_rttvar;
int ccid2hctx_sent;
unsigned long ccid2hctx_lastrtt;
struct timer_list ccid2hctx_rtotimer;
unsigned long ccid2hctx_arsent;
int ccid2hctx_ackloss;
u64 ccid2hctx_rpseq;
int ccid2hctx_rpdupack;
int ccid2hctx_sendwait;
unsigned long ccid2hctx_last_cong;
u64 ccid2hctx_high_ack;
};
struct ccid2_hc_rx_sock {
int ccid2hcrx_data;
};
static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
{
return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
}
static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk)
{
return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
}
#endif /* _DCCP_CCID2_H_ */

1288
net/dccp/ccids/ccid3.c Normal file

File diff suppressed because it is too large Load Diff

184
net/dccp/ccids/ccid3.h Normal file
View File

@@ -0,0 +1,184 @@
/*
* net/dccp/ccids/ccid3.h
*
* Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _DCCP_CCID3_H_
#define _DCCP_CCID3_H_
#include <linux/list.h>
#include <linux/time.h>
#include <linux/types.h>
#include <linux/tfrc.h>
#include "../ccid.h"
/* Two seconds as per RFC 3448 4.2 */
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
#define TFRC_T_MBI 64
/* What we think is a reasonable upper limit on RTT values */
#define CCID3_SANE_RTT_MAX ((suseconds_t)(4 * USEC_PER_SEC))
#define CCID3_RTT_SANITY_CHECK(rtt) do { \
if (rtt > CCID3_SANE_RTT_MAX) { \
DCCP_CRIT("RTT (%d) too large, substituting %d", \
(int)rtt, (int)CCID3_SANE_RTT_MAX); \
rtt = CCID3_SANE_RTT_MAX; \
} } while (0)
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
TFRC_OPT_LOSS_INTERVALS = 193,
TFRC_OPT_RECEIVE_RATE = 194,
};
struct ccid3_options_received {
u64 ccid3or_seqno:48,
ccid3or_loss_intervals_idx:16;
u16 ccid3or_loss_intervals_len;
u32 ccid3or_loss_event_rate;
u32 ccid3or_receive_rate;
};
/* TFRC sender states */
enum ccid3_hc_tx_states {
TFRC_SSTATE_NO_SENT = 1,
TFRC_SSTATE_NO_FBACK,
TFRC_SSTATE_FBACK,
TFRC_SSTATE_TERM,
};
/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
*
* @ccid3hctx_x - Current sending rate in 64 * bytes per second
* @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
* @ccid3hctx_x_calc - Calculated rate in bytes per second
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
* @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
* @ccid3hctx_s - Packet size in bytes
* @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
* @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
* @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
* @ccid3hctx_last_win_count - Last window counter sent
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet
* with last_win_count value sent
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
* @ccid3hctx_idle - Flag indicating that sender is idling
* @ccid3hctx_t_ld - Time last doubled during slow start
* @ccid3hctx_t_nom - Nominal send time of next packet
* @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
* @ccid3hctx_hist - Packet history
* @ccid3hctx_options_received - Parsed set of retrieved options
*/
struct ccid3_hc_tx_sock {
struct tfrc_tx_info ccid3hctx_tfrc;
#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv
#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc
#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt
#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p
#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
u16 ccid3hctx_s;
enum ccid3_hc_tx_states ccid3hctx_state:8;
u8 ccid3hctx_last_win_count;
u8 ccid3hctx_idle;
struct timeval ccid3hctx_t_last_win_count;
struct timer_list ccid3hctx_no_feedback_timer;
struct timeval ccid3hctx_t_ld;
struct timeval ccid3hctx_t_nom;
u32 ccid3hctx_delta;
struct list_head ccid3hctx_hist;
struct ccid3_options_received ccid3hctx_options_received;
};
/* TFRC receiver states */
enum ccid3_hc_rx_states {
TFRC_RSTATE_NO_DATA = 1,
TFRC_RSTATE_DATA,
TFRC_RSTATE_TERM = 127,
};
/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
*
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
* @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
* @ccid3hcrx_p - current loss event rate (RFC 3448 5.4)
* @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number
* @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal
* @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
* @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
* @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
* @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
* @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
* @ccid3hcrx_hist - Packet history
* @ccid3hcrx_li_hist - Loss Interval History
* @ccid3hcrx_s - Received packet size in bytes
* @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
* @ccid3hcrx_elapsed_time - Time since packet reception
*/
struct ccid3_hc_rx_sock {
struct tfrc_rx_info ccid3hcrx_tfrc;
#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
u64 ccid3hcrx_seqno_nonloss:48,
ccid3hcrx_ccval_nonloss:4,
ccid3hcrx_ccval_last_counter:4;
enum ccid3_hc_rx_states ccid3hcrx_state:8;
u32 ccid3hcrx_bytes_recv;
struct timeval ccid3hcrx_tstamp_last_feedback;
struct timeval ccid3hcrx_tstamp_last_ack;
struct list_head ccid3hcrx_hist;
struct list_head ccid3hcrx_li_hist;
u16 ccid3hcrx_s;
u32 ccid3hcrx_pinv;
u32 ccid3hcrx_elapsed_time;
};
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
{
return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
}
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
{
return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
}
#endif /* _DCCP_CCID3_H_ */

View File

@@ -0,0 +1,3 @@
obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o

View File

@@ -0,0 +1,143 @@
/*
* net/dccp/ccids/lib/loss_interval.c
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
* Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/module.h>
#include <net/sock.h>
#include "../../dccp.h"
#include "loss_interval.h"
struct dccp_li_hist *dccp_li_hist_new(const char *name)
{
struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
static const char dccp_li_hist_mask[] = "li_hist_%s";
char *slab_name;
if (hist == NULL)
goto out;
slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
GFP_ATOMIC);
if (slab_name == NULL)
goto out_free_hist;
sprintf(slab_name, dccp_li_hist_mask, name);
hist->dccplih_slab = kmem_cache_create(slab_name,
sizeof(struct dccp_li_hist_entry),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (hist->dccplih_slab == NULL)
goto out_free_slab_name;
out:
return hist;
out_free_slab_name:
kfree(slab_name);
out_free_hist:
kfree(hist);
hist = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_new);
void dccp_li_hist_delete(struct dccp_li_hist *hist)
{
const char* name = kmem_cache_name(hist->dccplih_slab);
kmem_cache_destroy(hist->dccplih_slab);
kfree(name);
kfree(hist);
}
EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
{
struct dccp_li_hist_entry *entry, *next;
list_for_each_entry_safe(entry, next, list, dccplih_node) {
list_del_init(&entry->dccplih_node);
kmem_cache_free(hist->dccplih_slab, entry);
}
}
EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
/* Weights used to calculate loss event rate */
/*
* These are integers as per section 8 of RFC3448. We can then divide by 4 *
* when we use it.
*/
static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
4, 4, 4, 4, 3, 2, 1, 1,
};
u32 dccp_li_hist_calc_i_mean(struct list_head *list)
{
struct dccp_li_hist_entry *li_entry, *li_next;
int i = 0;
u32 i_tot;
u32 i_tot0 = 0;
u32 i_tot1 = 0;
u32 w_tot = 0;
list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
if (li_entry->dccplih_interval != ~0) {
i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
w_tot += dccp_li_hist_w[i];
if (i != 0)
i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
}
if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
break;
}
if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
return 0;
i_tot = max(i_tot0, i_tot1);
if (!w_tot) {
DCCP_WARN("w_tot = 0\n");
return 1;
}
return i_tot / w_tot;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
struct list_head *list, const u64 seq_loss, const u8 win_loss)
{
struct dccp_li_hist_entry *entry;
int i;
for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC);
if (entry == NULL) {
dccp_li_hist_purge(hist, list);
DCCP_BUG("loss interval list entry is NULL");
return 0;
}
entry->dccplih_interval = ~0;
list_add(&entry->dccplih_node, list);
}
entry->dccplih_seqno = seq_loss;
entry->dccplih_win_count = win_loss;
return 1;
}
EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);

View File

@@ -0,0 +1,57 @@
#ifndef _DCCP_LI_HIST_
#define _DCCP_LI_HIST_
/*
* net/dccp/ccids/lib/loss_interval.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
* Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/time.h>
#define DCCP_LI_HIST_IVAL_F_LENGTH 8
struct dccp_li_hist {
struct kmem_cache *dccplih_slab;
};
extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
struct dccp_li_hist_entry {
struct list_head dccplih_node;
u64 dccplih_seqno:48,
dccplih_win_count:4;
u32 dccplih_interval;
};
static inline struct dccp_li_hist_entry *
dccp_li_hist_entry_new(struct dccp_li_hist *hist,
const gfp_t prio)
{
return kmem_cache_alloc(hist->dccplih_slab, prio);
}
static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
struct dccp_li_hist_entry *entry)
{
if (entry != NULL)
kmem_cache_free(hist->dccplih_slab, entry);
}
extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
struct list_head *list);
extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
struct list_head *list, const u64 seq_loss, const u8 win_loss);
#endif /* _DCCP_LI_HIST_ */

View File

@@ -0,0 +1,300 @@
/*
* net/dccp/packet_history.c
*
* Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/module.h>
#include <linux/string.h>
#include "packet_history.h"
/*
* Transmitter History Routines
*/
struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
{
struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
static const char dccp_tx_hist_mask[] = "tx_hist_%s";
char *slab_name;
if (hist == NULL)
goto out;
slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
GFP_ATOMIC);
if (slab_name == NULL)
goto out_free_hist;
sprintf(slab_name, dccp_tx_hist_mask, name);
hist->dccptxh_slab = kmem_cache_create(slab_name,
sizeof(struct dccp_tx_hist_entry),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (hist->dccptxh_slab == NULL)
goto out_free_slab_name;
out:
return hist;
out_free_slab_name:
kfree(slab_name);
out_free_hist:
kfree(hist);
hist = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
{
const char* name = kmem_cache_name(hist->dccptxh_slab);
kmem_cache_destroy(hist->dccptxh_slab);
kfree(name);
kfree(hist);
}
EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
struct dccp_tx_hist_entry *
dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
{
struct dccp_tx_hist_entry *packet = NULL, *entry;
list_for_each_entry(entry, list, dccphtx_node)
if (entry->dccphtx_seqno == seq) {
packet = entry;
break;
}
return packet;
}
EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
{
struct dccp_tx_hist_entry *entry, *next;
list_for_each_entry_safe(entry, next, list, dccphtx_node) {
list_del_init(&entry->dccphtx_node);
dccp_tx_hist_entry_delete(hist, entry);
}
}
EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
struct list_head *list,
struct dccp_tx_hist_entry *packet)
{
struct dccp_tx_hist_entry *next;
list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
list_del_init(&packet->dccphtx_node);
dccp_tx_hist_entry_delete(hist, packet);
}
}
EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
/*
* Receiver History Routines
*/
struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
{
struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
static const char dccp_rx_hist_mask[] = "rx_hist_%s";
char *slab_name;
if (hist == NULL)
goto out;
slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
GFP_ATOMIC);
if (slab_name == NULL)
goto out_free_hist;
sprintf(slab_name, dccp_rx_hist_mask, name);
hist->dccprxh_slab = kmem_cache_create(slab_name,
sizeof(struct dccp_rx_hist_entry),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (hist->dccprxh_slab == NULL)
goto out_free_slab_name;
out:
return hist;
out_free_slab_name:
kfree(slab_name);
out_free_hist:
kfree(hist);
hist = NULL;
goto out;
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
{
const char* name = kmem_cache_name(hist->dccprxh_slab);
kmem_cache_destroy(hist->dccprxh_slab);
kfree(name);
kfree(hist);
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
u8 *ccval)
{
struct dccp_rx_hist_entry *packet = NULL, *entry;
list_for_each_entry(entry, list, dccphrx_node)
if (entry->dccphrx_seqno == seq) {
packet = entry;
break;
}
if (packet)
*ccval = packet->dccphrx_ccval;
return packet != NULL;
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
struct dccp_rx_hist_entry *
dccp_rx_hist_find_data_packet(const struct list_head *list)
{
struct dccp_rx_hist_entry *entry, *packet = NULL;
list_for_each_entry(entry, list, dccphrx_node)
if (entry->dccphrx_type == DCCP_PKT_DATA ||
entry->dccphrx_type == DCCP_PKT_DATAACK) {
packet = entry;
break;
}
return packet;
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
struct list_head *rx_list,
struct list_head *li_list,
struct dccp_rx_hist_entry *packet,
u64 nonloss_seqno)
{
struct dccp_rx_hist_entry *entry, *next;
u8 num_later = 0;
list_add(&packet->dccphrx_node, rx_list);
num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
if (!list_empty(li_list)) {
list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
if (num_later == 0) {
if (after48(nonloss_seqno,
entry->dccphrx_seqno)) {
list_del_init(&entry->dccphrx_node);
dccp_rx_hist_entry_delete(hist, entry);
}
} else if (dccp_rx_hist_entry_data_packet(entry))
--num_later;
}
} else {
int step = 0;
u8 win_count = 0; /* Not needed, but lets shut up gcc */
int tmp;
/*
* We have no loss interval history so we need at least one
* rtt:s of data packets to approximate rtt.
*/
list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
if (num_later == 0) {
switch (step) {
case 0:
step = 1;
/* OK, find next data packet */
num_later = 1;
break;
case 1:
step = 2;
/* OK, find next data packet */
num_later = 1;
win_count = entry->dccphrx_ccval;
break;
case 2:
tmp = win_count - entry->dccphrx_ccval;
if (tmp < 0)
tmp += TFRC_WIN_COUNT_LIMIT;
if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
/*
* We have found a packet older
* than one rtt remove the rest
*/
step = 3;
} else /* OK, find next data packet */
num_later = 1;
break;
case 3:
list_del_init(&entry->dccphrx_node);
dccp_rx_hist_entry_delete(hist, entry);
break;
}
} else if (dccp_rx_hist_entry_data_packet(entry))
--num_later;
}
}
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
{
struct dccp_rx_hist_entry *entry, *next;
list_for_each_entry_safe(entry, next, list, dccphrx_node) {
list_del_init(&entry->dccphrx_node);
kmem_cache_free(hist->dccprxh_slab, entry);
}
}
EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
"Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
MODULE_DESCRIPTION("DCCP TFRC library");
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,202 @@
/*
* net/dccp/packet_history.h
*
* Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
*
* An implementation of the DCCP protocol
*
* This code has been developed by the University of Waikato WAND
* research group. For further information please see http://www.wand.net.nz/
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
*
* This code also uses code from Lulea University, rereleased as GPL by its
* authors:
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
* and to make it work as a loadable module in the DCCP stack written by
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
*
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _DCCP_PKT_HIST_
#define _DCCP_PKT_HIST_
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/time.h>
#include "../../dccp.h"
/* Number of later packets received before one is considered lost */
#define TFRC_RECV_NUM_LATE_LOSS 3
#define TFRC_WIN_COUNT_PER_RTT 4
#define TFRC_WIN_COUNT_LIMIT 16
/*
* Transmitter History data structures and declarations
*/
struct dccp_tx_hist_entry {
struct list_head dccphtx_node;
u64 dccphtx_seqno:48,
dccphtx_sent:1;
u32 dccphtx_rtt;
struct timeval dccphtx_tstamp;
};
struct dccp_tx_hist {
struct kmem_cache *dccptxh_slab;
};
extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
static inline struct dccp_tx_hist_entry *
dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
const gfp_t prio)
{
struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
prio);
if (entry != NULL)
entry->dccphtx_sent = 0;
return entry;
}
static inline struct dccp_tx_hist_entry *
dccp_tx_hist_head(struct list_head *list)
{
struct dccp_tx_hist_entry *head = NULL;
if (!list_empty(list))
head = list_entry(list->next, struct dccp_tx_hist_entry,
dccphtx_node);
return head;
}
extern struct dccp_tx_hist_entry *
dccp_tx_hist_find_entry(const struct list_head *list,
const u64 seq);
static inline void dccp_tx_hist_add_entry(struct list_head *list,
struct dccp_tx_hist_entry *entry)
{
list_add(&entry->dccphtx_node, list);
}
static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
struct dccp_tx_hist_entry *entry)
{
if (entry != NULL)
kmem_cache_free(hist->dccptxh_slab, entry);
}
extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
struct list_head *list);
extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
struct list_head *list,
struct dccp_tx_hist_entry *next);
/*
* Receiver History data structures and declarations
*/
struct dccp_rx_hist_entry {
struct list_head dccphrx_node;
u64 dccphrx_seqno:48,
dccphrx_ccval:4,
dccphrx_type:4;
u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */
struct timeval dccphrx_tstamp;
};
struct dccp_rx_hist {
struct kmem_cache *dccprxh_slab;
};
extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
static inline struct dccp_rx_hist_entry *
dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
const struct sock *sk,
const u32 ndp,
const struct sk_buff *skb,
const gfp_t prio)
{
struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
prio);
if (entry != NULL) {
const struct dccp_hdr *dh = dccp_hdr(skb);
entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
entry->dccphrx_ccval = dh->dccph_ccval;
entry->dccphrx_type = dh->dccph_type;
entry->dccphrx_ndp = ndp;
dccp_timestamp(sk, &entry->dccphrx_tstamp);
}
return entry;
}
static inline struct dccp_rx_hist_entry *
dccp_rx_hist_head(struct list_head *list)
{
struct dccp_rx_hist_entry *head = NULL;
if (!list_empty(list))
head = list_entry(list->next, struct dccp_rx_hist_entry,
dccphrx_node);
return head;
}
extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
u8 *ccval);
extern struct dccp_rx_hist_entry *
dccp_rx_hist_find_data_packet(const struct list_head *list);
extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
struct list_head *rx_list,
struct list_head *li_list,
struct dccp_rx_hist_entry *packet,
u64 nonloss_seqno);
static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
struct dccp_rx_hist_entry *entry)
{
if (entry != NULL)
kmem_cache_free(hist->dccprxh_slab, entry);
}
extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
struct list_head *list);
static inline int
dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
{
return entry->dccphrx_type == DCCP_PKT_DATA ||
entry->dccphrx_type == DCCP_PKT_DATAACK;
}
extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
struct list_head *li_list, u8 *win_loss);
#endif /* _DCCP_PKT_HIST_ */

43
net/dccp/ccids/lib/tfrc.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef _TFRC_H_
#define _TFRC_H_
/*
* net/dccp/ccids/lib/tfrc.h
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
* Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/types.h>
#include <asm/div64.h>
/* integer-arithmetic divisions of type (a * 1000000)/b */
static inline u64 scaled_div(u64 a, u32 b)
{
BUG_ON(b==0);
a *= 1000000;
do_div(a, b);
return a;
}
static inline u32 scaled_div32(u64 a, u32 b)
{
u64 result = scaled_div(a, b);
if (result > UINT_MAX) {
DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U",
(unsigned long long)a, b);
return UINT_MAX;
}
return result;
}
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
#endif /* _TFRC_H_ */

View File

@@ -0,0 +1,697 @@
/*
* net/dccp/ccids/lib/tfrc_equation.c
*
* Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
* Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/module.h>
#include "../../dccp.h"
#include "tfrc.h"
#define TFRC_CALC_X_ARRSIZE 500
#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */
#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE)
/*
TFRC TCP Reno Throughput Equation Lookup Table for f(p)
The following two-column lookup table implements a part of the TCP throughput
equation from [RFC 3448, sec. 3.1]:
s
X_calc = --------------------------------------------------------------
R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3))
Where:
X is the transmit rate in bytes/second
s is the packet size in bytes
R is the round trip time in seconds
p is the loss event rate, between 0 and 1.0, of the number of loss
events as a fraction of the number of packets transmitted
t_RTO is the TCP retransmission timeout value in seconds
b is the number of packets acknowledged by a single TCP ACK
We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes:
s
X_calc = -------------------------------------------------------
R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3))
which we can break down into:
s
X_calc = ---------
R * f(p)
where f(p) is given for 0 < p <= 1 by:
f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3)
Since this is kernel code, floating-point arithmetic is avoided in favour of
integer arithmetic. This means that nearly all fractional parameters are
scaled by 1000000:
* the parameters p and R
* the return result f(p)
The lookup table therefore actually tabulates the following function g(q):
g(q) = 1000000 * f(q/1000000)
Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer
granularity for the practically more relevant case of small values of p (up to
5%), the second column is used; the first one ranges up to 100%. This split
corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also
determines the smallest resolution possible with this lookup table:
TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE
The entire table is generated by:
for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) {
lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE);
lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE);
}
With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1,
lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%)
lookup[M][0] = g(1000000) = 1000000 * f(100%)
lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%)
lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%)
In summary, the two columns represent f(p) for the following ranges:
* The first column is for 0.002 <= p <= 1.0
* The second column is for 0.0001 <= p <= 0.05
Where the columns overlap, the second (finer-grained) is given preference,
i.e. the first column is used only for p >= 0.05.
*/
static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
{ 37172, 8172 },
{ 53499, 11567 },
{ 66664, 14180 },
{ 78298, 16388 },
{ 89021, 18339 },
{ 99147, 20108 },
{ 108858, 21738 },
{ 118273, 23260 },
{ 127474, 24693 },
{ 136520, 26052 },
{ 145456, 27348 },
{ 154316, 28589 },
{ 163130, 29783 },
{ 171919, 30935 },
{ 180704, 32049 },
{ 189502, 33130 },
{ 198328, 34180 },
{ 207194, 35202 },
{ 216114, 36198 },
{ 225097, 37172 },
{ 234153, 38123 },
{ 243294, 39055 },
{ 252527, 39968 },
{ 261861, 40864 },
{ 271305, 41743 },
{ 280866, 42607 },
{ 290553, 43457 },
{ 300372, 44293 },
{ 310333, 45117 },
{ 320441, 45929 },
{ 330705, 46729 },
{ 341131, 47518 },
{ 351728, 48297 },
{ 362501, 49066 },
{ 373460, 49826 },
{ 384609, 50577 },
{ 395958, 51320 },
{ 407513, 52054 },
{ 419281, 52780 },
{ 431270, 53499 },
{ 443487, 54211 },
{ 455940, 54916 },
{ 468635, 55614 },
{ 481581, 56306 },
{ 494785, 56991 },
{ 508254, 57671 },
{ 521996, 58345 },
{ 536019, 59014 },
{ 550331, 59677 },
{ 564939, 60335 },
{ 579851, 60988 },
{ 595075, 61636 },
{ 610619, 62279 },
{ 626491, 62918 },
{ 642700, 63553 },
{ 659253, 64183 },
{ 676158, 64809 },
{ 693424, 65431 },
{ 711060, 66050 },
{ 729073, 66664 },
{ 747472, 67275 },
{ 766266, 67882 },
{ 785464, 68486 },
{ 805073, 69087 },
{ 825103, 69684 },
{ 845562, 70278 },
{ 866460, 70868 },
{ 887805, 71456 },
{ 909606, 72041 },
{ 931873, 72623 },
{ 954614, 73202 },
{ 977839, 73778 },
{ 1001557, 74352 },
{ 1025777, 74923 },
{ 1050508, 75492 },
{ 1075761, 76058 },
{ 1101544, 76621 },
{ 1127867, 77183 },
{ 1154739, 77741 },
{ 1182172, 78298 },
{ 1210173, 78852 },
{ 1238753, 79405 },
{ 1267922, 79955 },
{ 1297689, 80503 },
{ 1328066, 81049 },
{ 1359060, 81593 },
{ 1390684, 82135 },
{ 1422947, 82675 },
{ 1455859, 83213 },
{ 1489430, 83750 },
{ 1523671, 84284 },
{ 1558593, 84817 },
{ 1594205, 85348 },
{ 1630518, 85878 },
{ 1667543, 86406 },
{ 1705290, 86932 },
{ 1743770, 87457 },
{ 1782994, 87980 },
{ 1822973, 88501 },
{ 1863717, 89021 },
{ 1905237, 89540 },
{ 1947545, 90057 },
{ 1990650, 90573 },
{ 2034566, 91087 },
{ 2079301, 91600 },
{ 2124869, 92111 },
{ 2171279, 92622 },
{ 2218543, 93131 },
{ 2266673, 93639 },
{ 2315680, 94145 },
{ 2365575, 94650 },
{ 2416371, 95154 },
{ 2468077, 95657 },
{ 2520707, 96159 },
{ 2574271, 96660 },
{ 2628782, 97159 },
{ 2684250, 97658 },
{ 2740689, 98155 },
{ 2798110, 98651 },
{ 2856524, 99147 },
{ 2915944, 99641 },
{ 2976382, 100134 },
{ 3037850, 100626 },
{ 3100360, 101117 },
{ 3163924, 101608 },
{ 3228554, 102097 },
{ 3294263, 102586 },
{ 3361063, 103073 },
{ 3428966, 103560 },
{ 3497984, 104045 },
{ 3568131, 104530 },
{ 3639419, 105014 },
{ 3711860, 105498 },
{ 3785467, 105980 },
{ 3860253, 106462 },
{ 3936229, 106942 },
{ 4013410, 107422 },
{ 4091808, 107902 },
{ 4171435, 108380 },
{ 4252306, 108858 },
{ 4334431, 109335 },
{ 4417825, 109811 },
{ 4502501, 110287 },
{ 4588472, 110762 },
{ 4675750, 111236 },
{ 4764349, 111709 },
{ 4854283, 112182 },
{ 4945564, 112654 },
{ 5038206, 113126 },
{ 5132223, 113597 },
{ 5227627, 114067 },
{ 5324432, 114537 },
{ 5422652, 115006 },
{ 5522299, 115474 },
{ 5623389, 115942 },
{ 5725934, 116409 },
{ 5829948, 116876 },
{ 5935446, 117342 },
{ 6042439, 117808 },
{ 6150943, 118273 },
{ 6260972, 118738 },
{ 6372538, 119202 },
{ 6485657, 119665 },
{ 6600342, 120128 },
{ 6716607, 120591 },
{ 6834467, 121053 },
{ 6953935, 121514 },
{ 7075025, 121976 },
{ 7197752, 122436 },
{ 7322131, 122896 },
{ 7448175, 123356 },
{ 7575898, 123815 },
{ 7705316, 124274 },
{ 7836442, 124733 },
{ 7969291, 125191 },
{ 8103877, 125648 },
{ 8240216, 126105 },
{ 8378321, 126562 },
{ 8518208, 127018 },
{ 8659890, 127474 },
{ 8803384, 127930 },
{ 8948702, 128385 },
{ 9095861, 128840 },
{ 9244875, 129294 },
{ 9395760, 129748 },
{ 9548529, 130202 },
{ 9703198, 130655 },
{ 9859782, 131108 },
{ 10018296, 131561 },
{ 10178755, 132014 },
{ 10341174, 132466 },
{ 10505569, 132917 },
{ 10671954, 133369 },
{ 10840345, 133820 },
{ 11010757, 134271 },
{ 11183206, 134721 },
{ 11357706, 135171 },
{ 11534274, 135621 },
{ 11712924, 136071 },
{ 11893673, 136520 },
{ 12076536, 136969 },
{ 12261527, 137418 },
{ 12448664, 137867 },
{ 12637961, 138315 },
{ 12829435, 138763 },
{ 13023101, 139211 },
{ 13218974, 139658 },
{ 13417071, 140106 },
{ 13617407, 140553 },
{ 13819999, 140999 },
{ 14024862, 141446 },
{ 14232012, 141892 },
{ 14441465, 142339 },
{ 14653238, 142785 },
{ 14867346, 143230 },
{ 15083805, 143676 },
{ 15302632, 144121 },
{ 15523842, 144566 },
{ 15747453, 145011 },
{ 15973479, 145456 },
{ 16201939, 145900 },
{ 16432847, 146345 },
{ 16666221, 146789 },
{ 16902076, 147233 },
{ 17140429, 147677 },
{ 17381297, 148121 },
{ 17624696, 148564 },
{ 17870643, 149007 },
{ 18119154, 149451 },
{ 18370247, 149894 },
{ 18623936, 150336 },
{ 18880241, 150779 },
{ 19139176, 151222 },
{ 19400759, 151664 },
{ 19665007, 152107 },
{ 19931936, 152549 },
{ 20201564, 152991 },
{ 20473907, 153433 },
{ 20748982, 153875 },
{ 21026807, 154316 },
{ 21307399, 154758 },
{ 21590773, 155199 },
{ 21876949, 155641 },
{ 22165941, 156082 },
{ 22457769, 156523 },
{ 22752449, 156964 },
{ 23049999, 157405 },
{ 23350435, 157846 },
{ 23653774, 158287 },
{ 23960036, 158727 },
{ 24269236, 159168 },
{ 24581392, 159608 },
{ 24896521, 160049 },
{ 25214642, 160489 },
{ 25535772, 160929 },
{ 25859927, 161370 },
{ 26187127, 161810 },
{ 26517388, 162250 },
{ 26850728, 162690 },
{ 27187165, 163130 },
{ 27526716, 163569 },
{ 27869400, 164009 },
{ 28215234, 164449 },
{ 28564236, 164889 },
{ 28916423, 165328 },
{ 29271815, 165768 },
{ 29630428, 166208 },
{ 29992281, 166647 },
{ 30357392, 167087 },
{ 30725779, 167526 },
{ 31097459, 167965 },
{ 31472452, 168405 },
{ 31850774, 168844 },
{ 32232445, 169283 },
{ 32617482, 169723 },
{ 33005904, 170162 },
{ 33397730, 170601 },
{ 33792976, 171041 },
{ 34191663, 171480 },
{ 34593807, 171919 },
{ 34999428, 172358 },
{ 35408544, 172797 },
{ 35821174, 173237 },
{ 36237335, 173676 },
{ 36657047, 174115 },
{ 37080329, 174554 },
{ 37507197, 174993 },
{ 37937673, 175433 },
{ 38371773, 175872 },
{ 38809517, 176311 },
{ 39250924, 176750 },
{ 39696012, 177190 },
{ 40144800, 177629 },
{ 40597308, 178068 },
{ 41053553, 178507 },
{ 41513554, 178947 },
{ 41977332, 179386 },
{ 42444904, 179825 },
{ 42916290, 180265 },
{ 43391509, 180704 },
{ 43870579, 181144 },
{ 44353520, 181583 },
{ 44840352, 182023 },
{ 45331092, 182462 },
{ 45825761, 182902 },
{ 46324378, 183342 },
{ 46826961, 183781 },
{ 47333531, 184221 },
{ 47844106, 184661 },
{ 48358706, 185101 },
{ 48877350, 185541 },
{ 49400058, 185981 },
{ 49926849, 186421 },
{ 50457743, 186861 },
{ 50992759, 187301 },
{ 51531916, 187741 },
{ 52075235, 188181 },
{ 52622735, 188622 },
{ 53174435, 189062 },
{ 53730355, 189502 },
{ 54290515, 189943 },
{ 54854935, 190383 },
{ 55423634, 190824 },
{ 55996633, 191265 },
{ 56573950, 191706 },
{ 57155606, 192146 },
{ 57741621, 192587 },
{ 58332014, 193028 },
{ 58926806, 193470 },
{ 59526017, 193911 },
{ 60129666, 194352 },
{ 60737774, 194793 },
{ 61350361, 195235 },
{ 61967446, 195677 },
{ 62589050, 196118 },
{ 63215194, 196560 },
{ 63845897, 197002 },
{ 64481179, 197444 },
{ 65121061, 197886 },
{ 65765563, 198328 },
{ 66414705, 198770 },
{ 67068508, 199213 },
{ 67726992, 199655 },
{ 68390177, 200098 },
{ 69058085, 200540 },
{ 69730735, 200983 },
{ 70408147, 201426 },
{ 71090343, 201869 },
{ 71777343, 202312 },
{ 72469168, 202755 },
{ 73165837, 203199 },
{ 73867373, 203642 },
{ 74573795, 204086 },
{ 75285124, 204529 },
{ 76001380, 204973 },
{ 76722586, 205417 },
{ 77448761, 205861 },
{ 78179926, 206306 },
{ 78916102, 206750 },
{ 79657310, 207194 },
{ 80403571, 207639 },
{ 81154906, 208084 },
{ 81911335, 208529 },
{ 82672880, 208974 },
{ 83439562, 209419 },
{ 84211402, 209864 },
{ 84988421, 210309 },
{ 85770640, 210755 },
{ 86558080, 211201 },
{ 87350762, 211647 },
{ 88148708, 212093 },
{ 88951938, 212539 },
{ 89760475, 212985 },
{ 90574339, 213432 },
{ 91393551, 213878 },
{ 92218133, 214325 },
{ 93048107, 214772 },
{ 93883493, 215219 },
{ 94724314, 215666 },
{ 95570590, 216114 },
{ 96422343, 216561 },
{ 97279594, 217009 },
{ 98142366, 217457 },
{ 99010679, 217905 },
{ 99884556, 218353 },
{ 100764018, 218801 },
{ 101649086, 219250 },
{ 102539782, 219698 },
{ 103436128, 220147 },
{ 104338146, 220596 },
{ 105245857, 221046 },
{ 106159284, 221495 },
{ 107078448, 221945 },
{ 108003370, 222394 },
{ 108934074, 222844 },
{ 109870580, 223294 },
{ 110812910, 223745 },
{ 111761087, 224195 },
{ 112715133, 224646 },
{ 113675069, 225097 },
{ 114640918, 225548 },
{ 115612702, 225999 },
{ 116590442, 226450 },
{ 117574162, 226902 },
{ 118563882, 227353 },
{ 119559626, 227805 },
{ 120561415, 228258 },
{ 121569272, 228710 },
{ 122583219, 229162 },
{ 123603278, 229615 },
{ 124629471, 230068 },
{ 125661822, 230521 },
{ 126700352, 230974 },
{ 127745083, 231428 },
{ 128796039, 231882 },
{ 129853241, 232336 },
{ 130916713, 232790 },
{ 131986475, 233244 },
{ 133062553, 233699 },
{ 134144966, 234153 },
{ 135233739, 234608 },
{ 136328894, 235064 },
{ 137430453, 235519 },
{ 138538440, 235975 },
{ 139652876, 236430 },
{ 140773786, 236886 },
{ 141901190, 237343 },
{ 143035113, 237799 },
{ 144175576, 238256 },
{ 145322604, 238713 },
{ 146476218, 239170 },
{ 147636442, 239627 },
{ 148803298, 240085 },
{ 149976809, 240542 },
{ 151156999, 241000 },
{ 152343890, 241459 },
{ 153537506, 241917 },
{ 154737869, 242376 },
{ 155945002, 242835 },
{ 157158929, 243294 },
{ 158379673, 243753 },
{ 159607257, 244213 },
{ 160841704, 244673 },
{ 162083037, 245133 },
{ 163331279, 245593 },
{ 164586455, 246054 },
{ 165848586, 246514 },
{ 167117696, 246975 },
{ 168393810, 247437 },
{ 169676949, 247898 },
{ 170967138, 248360 },
{ 172264399, 248822 },
{ 173568757, 249284 },
{ 174880235, 249747 },
{ 176198856, 250209 },
{ 177524643, 250672 },
{ 178857621, 251136 },
{ 180197813, 251599 },
{ 181545242, 252063 },
{ 182899933, 252527 },
{ 184261908, 252991 },
{ 185631191, 253456 },
{ 187007807, 253920 },
{ 188391778, 254385 },
{ 189783129, 254851 },
{ 191181884, 255316 },
{ 192588065, 255782 },
{ 194001698, 256248 },
{ 195422805, 256714 },
{ 196851411, 257181 },
{ 198287540, 257648 },
{ 199731215, 258115 },
{ 201182461, 258582 },
{ 202641302, 259050 },
{ 204107760, 259518 },
{ 205581862, 259986 },
{ 207063630, 260454 },
{ 208553088, 260923 },
{ 210050262, 261392 },
{ 211555174, 261861 },
{ 213067849, 262331 },
{ 214588312, 262800 },
{ 216116586, 263270 },
{ 217652696, 263741 },
{ 219196666, 264211 },
{ 220748520, 264682 },
{ 222308282, 265153 },
{ 223875978, 265625 },
{ 225451630, 266097 },
{ 227035265, 266569 },
{ 228626905, 267041 },
{ 230226576, 267514 },
{ 231834302, 267986 },
{ 233450107, 268460 },
{ 235074016, 268933 },
{ 236706054, 269407 },
{ 238346244, 269881 },
{ 239994613, 270355 },
{ 241651183, 270830 },
{ 243315981, 271305 }
};
/* return largest index i such that fval <= lookup[i][small] */
static inline u32 tfrc_binsearch(u32 fval, u8 small)
{
u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1;
while (low < high) {
try = (low + high) / 2;
if (fval <= tfrc_calc_x_lookup[try][small])
high = try;
else
low = try + 1;
}
return high;
}
/**
* tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448
*
* @s: packet size in bytes
* @R: RTT scaled by 1000000 (i.e., microseconds)
* @p: loss ratio estimate scaled by 1000000
* Returns X_calc in bytes per second (not scaled).
*/
u32 tfrc_calc_x(u16 s, u32 R, u32 p)
{
u16 index;
u32 f;
u64 result;
/* check against invalid parameters and divide-by-zero */
BUG_ON(p > 1000000); /* p must not exceed 100% */
BUG_ON(p == 0); /* f(0) = 0, divide by zero */
if (R == 0) { /* possible divide by zero */
DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc.");
return ~0U;
}
if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
DCCP_WARN("Value of p (%d) below resolution. "
"Substituting %d\n", p, TFRC_SMALLEST_P);
index = 0;
} else /* 0.0001 <= p <= 0.05 */
index = p/TFRC_SMALLEST_P - 1;
f = tfrc_calc_x_lookup[index][1];
} else { /* 0.05 < p <= 1.00 */
index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1;
f = tfrc_calc_x_lookup[index][0];
}
/*
* Compute X = s/(R*f(p)) in bytes per second.
* Since f(p) and R are both scaled by 1000000, we need to multiply by
* 1000000^2. To avoid overflow, the result is computed in two stages.
* This works under almost all reasonable operational conditions, for a
* wide range of parameters. Yet, should some strange combination of
* parameters result in overflow, the use of scaled_div32 will catch
* this and return UINT_MAX - which is a logically adequate consequence.
*/
result = scaled_div(s, R);
return scaled_div32(result, f);
}
EXPORT_SYMBOL_GPL(tfrc_calc_x);
/*
* tfrc_calc_x_reverse_lookup - try to find p given f(p)
*
* @fvalue: function value to match, scaled by 1000000
* Returns closest match for p, also scaled by 1000000
*/
u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
{
int index;
if (fvalue == 0) /* f(p) = 0 whenever p = 0 */
return 0;
/* Error cases. */
if (fvalue < tfrc_calc_x_lookup[0][1]) {
DCCP_WARN("fvalue %d smaller than resolution\n", fvalue);
return tfrc_calc_x_lookup[0][1];
}
if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue);
return 1000000;
}
if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) {
index = tfrc_binsearch(fvalue, 1);
return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE;
}
/* else ... it must be in the coarse-grained column */
index = tfrc_binsearch(fvalue, 0);
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);

452
net/dccp/dccp.h Normal file
View File

@@ -0,0 +1,452 @@
#ifndef _DCCP_H
#define _DCCP_H
/*
* net/dccp/dccp.h
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
* Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/dccp.h>
#include <net/snmp.h>
#include <net/sock.h>
#include <net/tcp.h>
#include "ackvec.h"
/*
* DCCP - specific warning and debugging macros.
*/
#define DCCP_WARN(fmt, a...) LIMIT_NETDEBUG(KERN_WARNING "%s: " fmt, \
__FUNCTION__, ##a)
#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \
__FILE__, __LINE__, __FUNCTION__)
#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0)
#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \
DCCP_BUG("\"%s\" holds (exception!)", \
__stringify(cond)); \
} while (0)
#ifdef MODULE
#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
printk(fmt, ##args); \
} while(0)
#else
#define DCCP_PRINTK(enable, fmt, args...) printk(fmt, ##args)
#endif
#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
"%s: " fmt, __FUNCTION__, ##a)
#ifdef CONFIG_IP_DCCP_DEBUG
extern int dccp_debug;
#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#else
#define dccp_pr_debug(format, a...)
#define dccp_pr_debug_cat(format, a...)
#endif
extern struct inet_hashinfo dccp_hashinfo;
extern atomic_t dccp_orphan_count;
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
/*
* Set safe upper bounds for header and option length. Since Data Offset is 8
* bits (RFC 4340, sec. 5.1), the total header length can never be more than
* 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1):
* - DCCP-Response with ACK Subheader and 4 bytes of Service code OR
* - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
* Hence a safe upper bound for the maximum option length is 1020-28 = 992
*/
#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int))
#define DCCP_MAX_PACKET_HDR 28
#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
* state, about 60 seconds */
/* RFC 1122, 4.2.3.1 initial RTO value */
#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
/* Maximal interval between probes for local resources. */
#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
/* sysctl variables for DCCP */
extern int sysctl_dccp_request_retries;
extern int sysctl_dccp_retries1;
extern int sysctl_dccp_retries2;
extern int sysctl_dccp_feat_sequence_window;
extern int sysctl_dccp_feat_rx_ccid;
extern int sysctl_dccp_feat_tx_ccid;
extern int sysctl_dccp_feat_ack_ratio;
extern int sysctl_dccp_feat_send_ack_vector;
extern int sysctl_dccp_feat_send_ndp_count;
extern int sysctl_dccp_tx_qlen;
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
}
/* is seq1 > seq2 ? */
static inline int after48(const u64 seq1, const u64 seq2)
{
return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
}
/* is seq2 <= seq1 <= seq3 ? */
static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
{
return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
}
static inline u64 max48(const u64 seq1, const u64 seq2)
{
return after48(seq1, seq2) ? seq1 : seq2;
}
/* is seq1 next seqno after seq2 */
static inline int follows48(const u64 seq1, const u64 seq2)
{
int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
return diff==1;
}
enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
DCCP_MIB_ESTABRESETS, /* EstabResets */
DCCP_MIB_CURRESTAB, /* CurrEstab */
DCCP_MIB_OUTSEGS, /* OutSegs */
DCCP_MIB_OUTRSTS,
DCCP_MIB_ABORTONTIMEOUT,
DCCP_MIB_TIMEOUTS,
DCCP_MIB_ABORTFAILED,
DCCP_MIB_PASSIVEOPENS,
DCCP_MIB_ATTEMPTFAILS,
DCCP_MIB_OUTDATAGRAMS,
DCCP_MIB_INERRS,
DCCP_MIB_OPTMANDATORYERROR,
DCCP_MIB_INVALIDOPT,
__DCCP_MIB_MAX
};
#define DCCP_MIB_MAX __DCCP_MIB_MAX
struct dccp_mib {
unsigned long mibs[DCCP_MIB_MAX];
} __SNMP_MIB_ALIGN__;
DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field)
#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field)
#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
#define DCCP_ADD_STATS_BH(field, val) \
SNMP_ADD_STATS_BH(dccp_statistics, field, val)
#define DCCP_ADD_STATS_USER(field, val) \
SNMP_ADD_STATS_USER(dccp_statistics, field, val)
/*
* Checksumming routines
*/
static inline int dccp_csum_coverage(const struct sk_buff *skb)
{
const struct dccp_hdr* dh = dccp_hdr(skb);
if (dh->dccph_cscov == 0)
return skb->len;
return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32);
}
static inline void dccp_csum_outgoing(struct sk_buff *skb)
{
int cov = dccp_csum_coverage(skb);
if (cov >= skb->len)
dccp_hdr(skb)->dccph_cscov = 0;
skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
}
extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
extern void dccp_reqsk_send_ack(struct sk_buff *sk, struct request_sock *rsk);
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
extern void dccp_write_xmit(struct sock *sk, int block);
extern void dccp_write_space(struct sock *sk);
extern void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
{
inet_csk_clear_xmit_timers(sk);
}
extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
extern const char *dccp_packet_name(const int type);
extern const char *dccp_state_name(const int state);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb);
extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
extern struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb);
extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev);
extern int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb);
extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len);
extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len);
extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
extern int dccp_destroy_sock(struct sock *sk);
extern void dccp_close(struct sock *sk, long timeout);
extern struct sk_buff *dccp_make_response(struct sock *sk,
struct dst_entry *dst,
struct request_sock *req);
extern int dccp_connect(struct sock *sk);
extern int dccp_disconnect(struct sock *sk, int flags);
extern void dccp_hash(struct sock *sk);
extern void dccp_unhash(struct sock *sk);
extern int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
extern int dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen);
#ifdef CONFIG_COMPAT
extern int compat_dccp_getsockopt(struct sock *sk,
int level, int optname,
char __user *optval, int __user *optlen);
extern int compat_dccp_setsockopt(struct sock *sk,
int level, int optname,
char __user *optval, int optlen);
#endif
extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size);
extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
extern void dccp_shutdown(struct sock *sk, int how);
extern int inet_dccp_listen(struct socket *sock, int backlog);
extern unsigned int dccp_poll(struct file *file, struct socket *sock,
poll_table *wait);
extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
{
const struct dccp_sock *dp = dccp_sk(sk);
if (dp->dccps_service == service)
return 0;
return !dccp_list_has_service(dp->dccps_service_list, service);
}
struct dccp_skb_cb {
__u8 dccpd_type:4;
__u8 dccpd_ccval:4;
__u8 dccpd_reset_code;
__u16 dccpd_opt_len;
__u64 dccpd_seq;
__u64 dccpd_ack_seq;
};
#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
static inline int dccp_non_data_packet(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
return type == DCCP_PKT_ACK ||
type == DCCP_PKT_CLOSE ||
type == DCCP_PKT_CLOSEREQ ||
type == DCCP_PKT_RESET ||
type == DCCP_PKT_SYNC ||
type == DCCP_PKT_SYNCACK;
}
static inline int dccp_packet_without_ack(const struct sk_buff *skb)
{
const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
}
#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
static inline void dccp_set_seqno(u64 *seqno, u64 value)
{
if (value > DCCP_MAX_SEQNO)
value -= DCCP_MAX_SEQNO + 1;
*seqno = value;
}
static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
{
return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
}
static inline void dccp_inc_seqno(u64 *seqno)
{
if (++*seqno > DCCP_MAX_SEQNO)
*seqno = 0;
}
static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
sizeof(*dh));
dh->dccph_seq2 = 0;
dh->dccph_seq = htons((gss >> 32) & 0xfffff);
dhx->dccph_seq_low = htonl(gss & 0xffffffff);
}
static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
const u64 gsr)
{
dhack->dccph_reserved1 = 0;
dhack->dccph_ack_nr_high = htons(gsr >> 32);
dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
}
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
const struct dccp_minisock *dmsk = dccp_msk(sk);
dp->dccps_gsr = seq;
dccp_set_seqno(&dp->dccps_swl,
dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
dccp_set_seqno(&dp->dccps_swh,
dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
}
static inline void dccp_update_gss(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_awh = dp->dccps_gss = seq;
dccp_set_seqno(&dp->dccps_awl,
(dp->dccps_gss -
dccp_msk(sk)->dccpms_sequence_window + 1));
}
static inline int dccp_ack_pending(const struct sock *sk)
{
const struct dccp_sock *dp = dccp_sk(sk);
return dp->dccps_timestamp_echo != 0 ||
#ifdef CONFIG_IP_DCCP_ACKVEC
(dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
#endif
inet_csk_ack_scheduled(sk);
}
extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
extern int dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
extern int dccp_insert_option_timestamp(struct sock *sk,
struct sk_buff *skb);
extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
static inline suseconds_t timeval_usecs(const struct timeval *tv)
{
return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
}
static inline suseconds_t timeval_delta(const struct timeval *large,
const struct timeval *small)
{
time_t secs = large->tv_sec - small->tv_sec;
suseconds_t usecs = large->tv_usec - small->tv_usec;
if (usecs < 0) {
secs--;
usecs += USEC_PER_SEC;
}
return secs * USEC_PER_SEC + usecs;
}
static inline void timeval_add_usecs(struct timeval *tv,
const suseconds_t usecs)
{
tv->tv_usec += usecs;
while (tv->tv_usec >= USEC_PER_SEC) {
tv->tv_sec++;
tv->tv_usec -= USEC_PER_SEC;
}
}
static inline void timeval_sub_usecs(struct timeval *tv,
const suseconds_t usecs)
{
tv->tv_usec -= usecs;
while (tv->tv_usec < 0) {
tv->tv_sec--;
tv->tv_usec += USEC_PER_SEC;
}
DCCP_BUG_ON(tv->tv_sec < 0);
}
#ifdef CONFIG_SYSCTL
extern int dccp_sysctl_init(void);
extern void dccp_sysctl_exit(void);
#else
static inline int dccp_sysctl_init(void)
{
return 0;
}
static inline void dccp_sysctl_exit(void)
{
}
#endif
#endif /* _DCCP_H */

70
net/dccp/diag.c Normal file
View File

@@ -0,0 +1,70 @@
/*
* net/dccp/diag.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@mandriva.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/inet_diag.h>
#include "ccid.h"
#include "dccp.h"
static void dccp_get_info(struct sock *sk, struct tcp_info *info)
{
struct dccp_sock *dp = dccp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
memset(info, 0, sizeof(*info));
info->tcpi_state = sk->sk_state;
info->tcpi_retransmits = icsk->icsk_retransmits;
info->tcpi_probes = icsk->icsk_probes_out;
info->tcpi_backoff = icsk->icsk_backoff;
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
if (dccp_msk(sk)->dccpms_send_ack_vector)
info->tcpi_options |= TCPI_OPT_SACK;
ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
}
static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *_info)
{
r->idiag_rqueue = r->idiag_wqueue = 0;
if (_info != NULL)
dccp_get_info(sk, _info);
}
static struct inet_diag_handler dccp_diag_handler = {
.idiag_hashinfo = &dccp_hashinfo,
.idiag_get_info = dccp_diag_get_info,
.idiag_type = DCCPDIAG_GETSOCK,
.idiag_info_size = sizeof(struct tcp_info),
};
static int __init dccp_diag_init(void)
{
return inet_diag_register(&dccp_diag_handler);
}
static void __exit dccp_diag_fini(void)
{
inet_diag_unregister(&dccp_diag_handler);
}
module_init(dccp_diag_init);
module_exit(dccp_diag_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
MODULE_DESCRIPTION("DCCP inet_diag handler");

644
net/dccp/feat.c Normal file
View File

@@ -0,0 +1,644 @@
/*
* net/dccp/feat.c
*
* An implementation of the DCCP protocol
* Andrea Bittau <a.bittau@cs.ucl.ac.uk>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include "ccid.h"
#include "feat.h"
#define DCCP_FEAT_SP_NOAGREE (-123)
int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
u8 *val, u8 len, gfp_t gfp)
{
struct dccp_opt_pend *opt;
dccp_feat_debug(type, feature, *val);
if (!dccp_feat_is_valid_type(type)) {
DCCP_WARN("option type %d invalid in negotiation\n", type);
return 1;
}
if (!dccp_feat_is_valid_length(type, feature, len)) {
DCCP_WARN("invalid length %d\n", len);
return 1;
}
/* XXX add further sanity checks */
/* check if that feature is already being negotiated */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
/* ok we found a negotiation for this option already */
if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
dccp_pr_debug("Replacing old\n");
/* replace */
BUG_ON(opt->dccpop_val == NULL);
kfree(opt->dccpop_val);
opt->dccpop_val = val;
opt->dccpop_len = len;
opt->dccpop_conf = 0;
return 0;
}
}
/* negotiation for a new feature */
opt = kmalloc(sizeof(*opt), gfp);
if (opt == NULL)
return -ENOMEM;
opt->dccpop_type = type;
opt->dccpop_feat = feature;
opt->dccpop_len = len;
opt->dccpop_val = val;
opt->dccpop_conf = 0;
opt->dccpop_sc = NULL;
BUG_ON(opt->dccpop_val == NULL);
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_feat_change);
static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_minisock *dmsk = dccp_msk(sk);
/* figure out if we are changing our CCID or the peer's */
const int rx = type == DCCPO_CHANGE_R;
const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
struct ccid *new_ccid;
/* Check if nothing is being changed. */
if (ccid_nr == new_ccid_nr)
return 0;
new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
if (new_ccid == NULL)
return -ENOMEM;
if (rx) {
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
dp->dccps_hc_rx_ccid = new_ccid;
dmsk->dccpms_rx_ccid = new_ccid_nr;
} else {
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_tx_ccid = new_ccid;
dmsk->dccpms_tx_ccid = new_ccid_nr;
}
return 0;
}
/* XXX taking only u8 vals */
static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
{
dccp_feat_debug(type, feat, val);
switch (feat) {
case DCCPF_CCID:
return dccp_feat_update_ccid(sk, type, val);
default:
dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
dccp_feat_typename(type), feat);
break;
}
return 0;
}
static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
u8 *rpref, u8 rlen)
{
struct dccp_sock *dp = dccp_sk(sk);
u8 *spref, slen, *res = NULL;
int i, j, rc, agree = 1;
BUG_ON(rpref == NULL);
/* check if we are the black sheep */
if (dp->dccps_role == DCCP_ROLE_CLIENT) {
spref = rpref;
slen = rlen;
rpref = opt->dccpop_val;
rlen = opt->dccpop_len;
} else {
spref = opt->dccpop_val;
slen = opt->dccpop_len;
}
/*
* Now we have server preference list in spref and client preference in
* rpref
*/
BUG_ON(spref == NULL);
BUG_ON(rpref == NULL);
/* FIXME sanity check vals */
/* Are values in any order? XXX Lame "algorithm" here */
/* XXX assume values are 1 byte */
for (i = 0; i < slen; i++) {
for (j = 0; j < rlen; j++) {
if (spref[i] == rpref[j]) {
res = &spref[i];
break;
}
}
if (res)
break;
}
/* we didn't agree on anything */
if (res == NULL) {
/* confirm previous value */
switch (opt->dccpop_feat) {
case DCCPF_CCID:
/* XXX did i get this right? =P */
if (opt->dccpop_type == DCCPO_CHANGE_L)
res = &dccp_msk(sk)->dccpms_tx_ccid;
else
res = &dccp_msk(sk)->dccpms_rx_ccid;
break;
default:
DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
/* XXX implement res */
return -EFAULT;
}
dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
agree = 0; /* this is used for mandatory options... */
}
/* need to put result and our preference list */
/* XXX assume 1 byte vals */
rlen = 1 + opt->dccpop_len;
rpref = kmalloc(rlen, GFP_ATOMIC);
if (rpref == NULL)
return -ENOMEM;
*rpref = *res;
memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
/* put it in the "confirm queue" */
if (opt->dccpop_sc == NULL) {
opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
if (opt->dccpop_sc == NULL) {
kfree(rpref);
return -ENOMEM;
}
} else {
/* recycle the confirm slot */
BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
kfree(opt->dccpop_sc->dccpoc_val);
dccp_pr_debug("recycling confirm slot\n");
}
memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
opt->dccpop_sc->dccpoc_val = rpref;
opt->dccpop_sc->dccpoc_len = rlen;
/* update the option on our side [we are about to send the confirm] */
rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
if (rc) {
kfree(opt->dccpop_sc->dccpoc_val);
kfree(opt->dccpop_sc);
opt->dccpop_sc = NULL;
return rc;
}
dccp_pr_debug("Will confirm %d\n", *rpref);
/* say we want to change to X but we just got a confirm X, suppress our
* change
*/
if (!opt->dccpop_conf) {
if (*opt->dccpop_val == *res)
opt->dccpop_conf = 1;
dccp_pr_debug("won't ask for change of same feature\n");
}
return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
}
static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
{
struct dccp_minisock *dmsk = dccp_msk(sk);
struct dccp_opt_pend *opt;
int rc = 1;
u8 t;
/*
* We received a CHANGE. We gotta match it against our own preference
* list. If we got a CHANGE_R it means it's a change for us, so we need
* to compare our CHANGE_L list.
*/
if (type == DCCPO_CHANGE_L)
t = DCCPO_CHANGE_R;
else
t = DCCPO_CHANGE_L;
/* find our preference list for this feature */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
if (opt->dccpop_type != t || opt->dccpop_feat != feature)
continue;
/* find the winner from the two preference lists */
rc = dccp_feat_reconcile(sk, opt, val, len);
break;
}
/* We didn't deal with the change. This can happen if we have no
* preference list for the feature. In fact, it just shouldn't
* happen---if we understand a feature, we should have a preference list
* with at least the default value.
*/
BUG_ON(rc == 1);
return rc;
}
static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
{
struct dccp_opt_pend *opt;
struct dccp_minisock *dmsk = dccp_msk(sk);
u8 *copy;
int rc;
/* NN features must be Change L (sec. 6.3.2) */
if (type != DCCPO_CHANGE_L) {
dccp_pr_debug("received %s for NN feature %d\n",
dccp_feat_typename(type), feature);
return -EFAULT;
}
/* XXX sanity check opt val */
/* copy option so we can confirm it */
opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
if (opt == NULL)
return -ENOMEM;
copy = kmemdup(val, len, GFP_ATOMIC);
if (copy == NULL) {
kfree(opt);
return -ENOMEM;
}
opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
opt->dccpop_feat = feature;
opt->dccpop_val = copy;
opt->dccpop_len = len;
/* change feature */
rc = dccp_feat_update(sk, type, feature, *val);
if (rc) {
kfree(opt->dccpop_val);
kfree(opt);
return rc;
}
dccp_feat_debug(type, feature, *copy);
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
return 0;
}
static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
u8 type, u8 feature)
{
/* XXX check if other confirms for that are queued and recycle slot */
struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
if (opt == NULL) {
/* XXX what do we do? Ignoring should be fine. It's a change
* after all =P
*/
return;
}
switch (type) {
case DCCPO_CHANGE_L: opt->dccpop_type = DCCPO_CONFIRM_R; break;
case DCCPO_CHANGE_R: opt->dccpop_type = DCCPO_CONFIRM_L; break;
default: DCCP_WARN("invalid type %d\n", type); return;
}
opt->dccpop_feat = feature;
opt->dccpop_val = NULL;
opt->dccpop_len = 0;
/* change feature */
dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
}
static void dccp_feat_flush_confirm(struct sock *sk)
{
struct dccp_minisock *dmsk = dccp_msk(sk);
/* Check if there is anything to confirm in the first place */
int yes = !list_empty(&dmsk->dccpms_conf);
if (!yes) {
struct dccp_opt_pend *opt;
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
if (opt->dccpop_conf) {
yes = 1;
break;
}
}
}
if (!yes)
return;
/* OK there is something to confirm... */
/* XXX check if packet is in flight? Send delayed ack?? */
if (sk->sk_state == DCCP_OPEN)
dccp_send_ack(sk);
}
int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
{
int rc;
dccp_feat_debug(type, feature, *val);
/* figure out if it's SP or NN feature */
switch (feature) {
/* deal with SP features */
case DCCPF_CCID:
rc = dccp_feat_sp(sk, type, feature, val, len);
break;
/* deal with NN features */
case DCCPF_ACK_RATIO:
rc = dccp_feat_nn(sk, type, feature, val, len);
break;
/* XXX implement other features */
default:
dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
dccp_feat_typename(type), feature);
rc = -EFAULT;
break;
}
/* check if there were problems changing features */
if (rc) {
/* If we don't agree on SP, we sent a confirm for old value.
* However we propagate rc to caller in case option was
* mandatory
*/
if (rc != DCCP_FEAT_SP_NOAGREE)
dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
}
/* generate the confirm [if required] */
dccp_feat_flush_confirm(sk);
return rc;
}
EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
u8 *val, u8 len)
{
u8 t;
struct dccp_opt_pend *opt;
struct dccp_minisock *dmsk = dccp_msk(sk);
int found = 0;
int all_confirmed = 1;
dccp_feat_debug(type, feature, *val);
/* locate our change request */
switch (type) {
case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
default: DCCP_WARN("invalid type %d\n", type);
return 1;
}
/* XXX sanity check feature value */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
if (!opt->dccpop_conf && opt->dccpop_type == t &&
opt->dccpop_feat == feature) {
found = 1;
dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
/* XXX do sanity check */
opt->dccpop_conf = 1;
/* We got a confirmation---change the option */
dccp_feat_update(sk, opt->dccpop_type,
opt->dccpop_feat, *val);
/* XXX check the return value of dccp_feat_update */
break;
}
if (!opt->dccpop_conf)
all_confirmed = 0;
}
/* fix re-transmit timer */
/* XXX gotta make sure that no option negotiation occurs during
* connection shutdown. Consider that the CLOSEREQ is sent and timer is
* on. if all options are confirmed it might kill timer which should
* remain alive until close is received.
*/
if (all_confirmed) {
dccp_pr_debug("clear feat negotiation timer %p\n", sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
}
if (!found)
dccp_pr_debug("%s(%d, ...) never requested\n",
dccp_feat_typename(type), feature);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
void dccp_feat_clean(struct dccp_minisock *dmsk)
{
struct dccp_opt_pend *opt, *next;
list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
dccpop_node) {
BUG_ON(opt->dccpop_val == NULL);
kfree(opt->dccpop_val);
if (opt->dccpop_sc != NULL) {
BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
kfree(opt->dccpop_sc->dccpoc_val);
kfree(opt->dccpop_sc);
}
kfree(opt);
}
INIT_LIST_HEAD(&dmsk->dccpms_pending);
list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
BUG_ON(opt == NULL);
if (opt->dccpop_val != NULL)
kfree(opt->dccpop_val);
kfree(opt);
}
INIT_LIST_HEAD(&dmsk->dccpms_conf);
}
EXPORT_SYMBOL_GPL(dccp_feat_clean);
/* this is to be called only when a listening sock creates its child. It is
* assumed by the function---the confirm is not duplicated, but rather it is
* "passed on".
*/
int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
{
struct dccp_minisock *olddmsk = dccp_msk(oldsk);
struct dccp_minisock *newdmsk = dccp_msk(newsk);
struct dccp_opt_pend *opt;
int rc = 0;
INIT_LIST_HEAD(&newdmsk->dccpms_pending);
INIT_LIST_HEAD(&newdmsk->dccpms_conf);
list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
struct dccp_opt_pend *newopt;
/* copy the value of the option */
u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
if (val == NULL)
goto out_clean;
newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
if (newopt == NULL) {
kfree(val);
goto out_clean;
}
/* insert the option */
newopt->dccpop_val = val;
list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
/* XXX what happens with backlogs and multiple connections at
* once...
*/
/* the master socket no longer needs to worry about confirms */
opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
/* reset state for a new socket */
opt->dccpop_conf = 0;
}
/* XXX not doing anything about the conf queue */
out:
return rc;
out_clean:
dccp_feat_clean(newdmsk);
rc = -ENOMEM;
goto out;
}
EXPORT_SYMBOL_GPL(dccp_feat_clone);
static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
u8 *val, u8 len)
{
int rc = -ENOMEM;
u8 *copy = kmemdup(val, len, GFP_KERNEL);
if (copy != NULL) {
rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
if (rc)
kfree(copy);
}
return rc;
}
int dccp_feat_init(struct dccp_minisock *dmsk)
{
int rc;
INIT_LIST_HEAD(&dmsk->dccpms_pending);
INIT_LIST_HEAD(&dmsk->dccpms_conf);
/* CCID L */
rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
&dmsk->dccpms_tx_ccid, 1);
if (rc)
goto out;
/* CCID R */
rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
&dmsk->dccpms_rx_ccid, 1);
if (rc)
goto out;
/* Ack ratio */
rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
&dmsk->dccpms_ack_ratio, 1);
out:
return rc;
}
EXPORT_SYMBOL_GPL(dccp_feat_init);
#ifdef CONFIG_IP_DCCP_DEBUG
const char *dccp_feat_typename(const u8 type)
{
switch(type) {
case DCCPO_CHANGE_L: return("ChangeL");
case DCCPO_CONFIRM_L: return("ConfirmL");
case DCCPO_CHANGE_R: return("ChangeR");
case DCCPO_CONFIRM_R: return("ConfirmR");
/* the following case must not appear in feature negotation */
default: dccp_pr_debug("unknown type %d [BUG!]\n", type);
}
return NULL;
}
EXPORT_SYMBOL_GPL(dccp_feat_typename);
const char *dccp_feat_name(const u8 feat)
{
static const char *feature_names[] = {
[DCCPF_RESERVED] = "Reserved",
[DCCPF_CCID] = "CCID",
[DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
[DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
[DCCPF_ECN_INCAPABLE] = "ECN Incapable",
[DCCPF_ACK_RATIO] = "Ack Ratio",
[DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
[DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
[DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
[DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
};
if (feat >= DCCPF_MIN_CCID_SPECIFIC)
return "CCID-specific";
if (dccp_feat_is_reserved(feat))
return feature_names[DCCPF_RESERVED];
return feature_names[feat];
}
EXPORT_SYMBOL_GPL(dccp_feat_name);
#endif /* CONFIG_IP_DCCP_DEBUG */

66
net/dccp/feat.h Normal file
View File

@@ -0,0 +1,66 @@
#ifndef _DCCP_FEAT_H
#define _DCCP_FEAT_H
/*
* net/dccp/feat.h
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include "dccp.h"
static inline int dccp_feat_is_valid_length(u8 type, u8 feature, u8 len)
{
/* sec. 6.1: Confirm has at least length 3,
* sec. 6.2: Change has at least length 4 */
if (len < 3)
return 1;
if (len < 4 && (type == DCCPO_CHANGE_L || type == DCCPO_CHANGE_R))
return 1;
/* XXX: add per-feature length validation (sec. 6.6.8) */
return 0;
}
static inline int dccp_feat_is_reserved(const u8 feat)
{
return (feat > DCCPF_DATA_CHECKSUM &&
feat < DCCPF_MIN_CCID_SPECIFIC) ||
feat == DCCPF_RESERVED;
}
/* feature negotiation knows only these four option types (RFC 4340, sec. 6) */
static inline int dccp_feat_is_valid_type(const u8 optnum)
{
return optnum >= DCCPO_CHANGE_L && optnum <= DCCPO_CONFIRM_R;
}
#ifdef CONFIG_IP_DCCP_DEBUG
extern const char *dccp_feat_typename(const u8 type);
extern const char *dccp_feat_name(const u8 feat);
static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val)
{
dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
dccp_feat_name(feat), feat, val);
}
#else
#define dccp_feat_debug(type, feat, val)
#endif /* CONFIG_IP_DCCP_DEBUG */
extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
u8 *val, u8 len, gfp_t gfp);
extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
u8 *val, u8 len);
extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
u8 *val, u8 len);
extern void dccp_feat_clean(struct dccp_minisock *dmsk);
extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
extern int dccp_feat_init(struct dccp_minisock *dmsk);
#endif /* _DCCP_FEAT_H */

577
net/dccp/input.c Normal file
View File

@@ -0,0 +1,577 @@
/*
* net/dccp/input.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
static void dccp_fin(struct sock *sk, struct sk_buff *skb)
{
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
__skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk, 0);
}
static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
{
dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
dccp_fin(sk, skb);
dccp_set_state(sk, DCCP_CLOSED);
sk_wake_async(sk, 1, POLL_HUP);
}
static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
{
/*
* Step 7: Check for unexpected packet types
* If (S.is_server and P.type == CloseReq)
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
return;
}
if (sk->sk_state != DCCP_CLOSING)
dccp_set_state(sk, DCCP_CLOSING);
dccp_send_close(sk, 0);
}
static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dccp_msk(sk)->dccpms_send_ack_vector)
dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
struct dccp_sock *dp = dccp_sk(sk);
u64 lswl, lawl;
/*
* Step 5: Prepare sequence numbers for Sync
* If P.type == Sync or P.type == SyncAck,
* If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
* / * P is valid, so update sequence number variables
* accordingly. After this update, P will pass the tests
* in Step 6. A SyncAck is generated if necessary in
* Step 15 * /
* Update S.GSR, S.SWL, S.SWH
* Otherwise,
* Drop packet and return
*/
if (dh->dccph_type == DCCP_PKT_SYNC ||
dh->dccph_type == DCCP_PKT_SYNCACK) {
if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh) &&
!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
else
return -1;
}
/*
* Step 6: Check sequence numbers
* Let LSWL = S.SWL and LAWL = S.AWL
* If P.type == CloseReq or P.type == Close or P.type == Reset,
* LSWL := S.GSR + 1, LAWL := S.GAR
* If LSWL <= P.seqno <= S.SWH
* and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
* Update S.GSR, S.SWL, S.SWH
* If P.type != Sync,
* Update S.GAR
* Otherwise,
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
lswl = dp->dccps_swl;
lawl = dp->dccps_awl;
if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
dh->dccph_type == DCCP_PKT_CLOSE ||
dh->dccph_type == DCCP_PKT_RESET) {
lswl = dp->dccps_gsr;
dccp_inc_seqno(&lswl);
lawl = dp->dccps_gar;
}
if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
(DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
lawl, dp->dccps_awh))) {
dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
if (dh->dccph_type != DCCP_PKT_SYNC &&
(DCCP_SKB_CB(skb)->dccpd_ack_seq !=
DCCP_PKT_WITHOUT_ACK_SEQ))
dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
} else {
DCCP_WARN("DCCP: Step 6 failed for %s packet, "
"(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
"(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
"sending SYNC...\n", dccp_packet_name(dh->dccph_type),
(unsigned long long) lswl,
(unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq,
(unsigned long long) dp->dccps_swh,
(DCCP_SKB_CB(skb)->dccpd_ack_seq ==
DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
(unsigned long long) lawl,
(unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq,
(unsigned long long) dp->dccps_awh);
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
return -1;
}
return 0;
}
static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
switch (dccp_hdr(skb)->dccph_type) {
case DCCP_PKT_DATAACK:
case DCCP_PKT_DATA:
/*
* FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
* option if it is.
*/
__skb_pull(skb, dh->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk, 0);
return 0;
case DCCP_PKT_ACK:
goto discard;
case DCCP_PKT_RESET:
/*
* Step 9: Process Reset
* If P.type == Reset,
* Tear down connection
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
*/
dccp_fin(sk, skb);
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
return 0;
case DCCP_PKT_CLOSEREQ:
dccp_rcv_closereq(sk, skb);
goto discard;
case DCCP_PKT_CLOSE:
dccp_rcv_close(sk, skb);
return 0;
case DCCP_PKT_REQUEST:
/* Step 7
* or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state >= OPEN and P.type == Request
* and P.seqno >= S.OSR)
* or (S.state >= OPEN and P.type == Response
* and P.seqno >= S.OSR)
* or (S.state == RESPOND and P.type == Data),
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
if (dp->dccps_role != DCCP_ROLE_LISTEN)
goto send_sync;
goto check_seq;
case DCCP_PKT_RESPONSE:
if (dp->dccps_role != DCCP_ROLE_CLIENT)
goto send_sync;
check_seq:
if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
send_sync:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNC);
}
break;
case DCCP_PKT_SYNC:
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNCACK);
/*
* From RFC 4340, sec. 5.7
*
* As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
* MAY have non-zero-length application data areas, whose
* contents receivers MUST ignore.
*/
goto discard;
}
DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
discard:
__kfree_skb(skb);
return 0;
}
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
if (dccp_check_seqno(sk, skb))
goto discard;
if (dccp_parse_options(sk, skb))
goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
goto discard;
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
return __dccp_rcv_established(sk, skb, dh, len);
discard:
__kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_rcv_established);
static int dccp_rcv_request_sent_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
const unsigned len)
{
/*
* Step 4: Prepare sequence numbers in REQUEST
* If S.state == REQUEST,
* If (P.type == Response or P.type == Reset)
* and S.AWL <= P.ackno <= S.AWH,
* / * Set sequence number variables corresponding to the
* other endpoint, so P will pass the tests in Step 6 * /
* Set S.GSR, S.ISR, S.SWL, S.SWH
* / * Response processing continues in Step 10; Reset
* processing continues in Step 9 * /
*/
if (dh->dccph_type == DCCP_PKT_RESPONSE) {
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
/* Stop the REQUEST timer */
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
BUG_TRAP(sk->sk_send_head != NULL);
__kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
dp->dccps_awl, dp->dccps_awh)) {
dccp_pr_debug("invalid ackno: S.AWL=%llu, "
"P.ackno=%llu, S.AWH=%llu \n",
(unsigned long long)dp->dccps_awl,
(unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
(unsigned long long)dp->dccps_awh);
goto out_invalid_packet;
}
if (dccp_parse_options(sk, skb))
goto out_invalid_packet;
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
goto out_invalid_packet; /* FIXME: change error code */
dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_update_gsr(sk, dp->dccps_isr);
/*
* SWL and AWL are initially adjusted so that they are not less than
* the initial Sequence Numbers received and sent, respectively:
* SWL := max(GSR + 1 - floor(W/4), ISR),
* AWL := max(GSS - W' + 1, ISS).
* These adjustments MUST be applied only at the beginning of the
* connection.
*
* AWL was adjusted in dccp_v4_connect -acme
*/
dccp_set_seqno(&dp->dccps_swl,
max48(dp->dccps_swl, dp->dccps_isr));
dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
/*
* Step 10: Process REQUEST state (second part)
* If S.state == REQUEST,
* / * If we get here, P is a valid Response from the
* server (see Step 4), and we should move to
* PARTOPEN state. PARTOPEN means send an Ack,
* don't send Data packets, retransmit Acks
* periodically, and always include any Init Cookie
* from the Response * /
* S.state := PARTOPEN
* Set PARTOPEN timer
* Continue with S.state == PARTOPEN
* / * Step 12 will send the Ack completing the
* three-way handshake * /
*/
dccp_set_state(sk, DCCP_PARTOPEN);
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, 0, POLL_OUT);
}
if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
icsk->icsk_accept_queue.rskq_defer_accept) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
* It may be deleted, but with this feature tcpdumps
* look so _wonderfully_ clever, that I was not able
* to stand against the temptation 8) --ANK
*/
/*
* OK, in DCCP we can as well do a similar trick, its
* even in the draft, but there is no need for us to
* schedule an ack here, as dccp_sendmsg does this for
* us, also stated in the draft. -acme
*/
__kfree_skb(skb);
return 0;
}
dccp_send_ack(sk);
return -1;
}
out_invalid_packet:
/* dccp_v4_do_rcv will send a reset */
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
return 1;
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
const unsigned len)
{
int queued = 0;
switch (dh->dccph_type) {
case DCCP_PKT_RESET:
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
break;
case DCCP_PKT_DATA:
if (sk->sk_state == DCCP_RESPOND)
break;
case DCCP_PKT_DATAACK:
case DCCP_PKT_ACK:
/*
* FIXME: we should be reseting the PARTOPEN (DELACK) timer
* here but only if we haven't used the DELACK timer for
* something else, like sending a delayed ack for a TIMESTAMP
* echo, etc, for now were not clearing it, sending an extra
* ACK when there is nothing else to do in DELACK is not a big
* deal after all.
*/
/* Stop the PARTOPEN timer */
if (sk->sk_state == DCCP_PARTOPEN)
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_set_state(sk, DCCP_OPEN);
if (dh->dccph_type == DCCP_PKT_DATAACK ||
dh->dccph_type == DCCP_PKT_DATA) {
__dccp_rcv_established(sk, skb, dh, len);
queued = 1; /* packet was queued
(by __dccp_rcv_established) */
}
break;
}
return queued;
}
int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const int old_state = sk->sk_state;
int queued = 0;
/*
* Step 3: Process LISTEN state
*
* If S.state == LISTEN,
* If P.type == Request or P contains a valid Init Cookie option,
* (* Must scan the packet's options to check for Init
* Cookies. Only Init Cookies are processed here,
* however; other options are processed in Step 8. This
* scan need only be performed if the endpoint uses Init
* Cookies *)
* (* Generate a new socket and switch to that socket *)
* Set S := new socket for this port pair
* S.state = RESPOND
* Choose S.ISS (initial seqno) or set from Init Cookies
* Initialize S.GAR := S.ISS
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
* Cookies Continue with S.state == RESPOND
* (* A Response packet will be generated in Step 11 *)
* Otherwise,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
skb) < 0)
return 1;
/* FIXME: do congestion control initialization */
goto discard;
}
if (dh->dccph_type == DCCP_PKT_RESET)
goto discard;
/* Caller (dccp_v4_do_rcv) will send Reset */
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1;
}
if (sk->sk_state != DCCP_REQUESTING) {
if (dccp_check_seqno(sk, skb))
goto discard;
/*
* Step 8: Process options and mark acknowledgeable
*/
if (dccp_parse_options(sk, skb))
goto discard;
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
goto discard;
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
}
/*
* Step 9: Process Reset
* If P.type == Reset,
* Tear down connection
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
*/
if (dh->dccph_type == DCCP_PKT_RESET) {
/*
* Queue the equivalent of TCP fin so that dccp_recvmsg
* exits the loop
*/
dccp_fin(sk, skb);
dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
return 0;
/*
* Step 7: Check for unexpected packet types
* If (S.is_server and P.type == CloseReq)
* or (S.is_server and P.type == Response)
* or (S.is_client and P.type == Request)
* or (S.state == RESPOND and P.type == Data),
* Send Sync packet acknowledging P.seqno
* Drop packet and return
*/
} else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
(dh->dccph_type == DCCP_PKT_RESPONSE ||
dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
(dp->dccps_role == DCCP_ROLE_CLIENT &&
dh->dccph_type == DCCP_PKT_REQUEST) ||
(sk->sk_state == DCCP_RESPOND &&
dh->dccph_type == DCCP_PKT_DATA)) {
dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
goto discard;
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
dccp_rcv_closereq(sk, skb);
goto discard;
} else if (dh->dccph_type == DCCP_PKT_CLOSE) {
dccp_rcv_close(sk, skb);
return 0;
}
if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
goto discard;
}
switch (sk->sk_state) {
case DCCP_CLOSED:
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1;
case DCCP_REQUESTING:
/* FIXME: do congestion control initialization */
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
if (queued >= 0)
return queued;
__kfree_skb(skb);
return 0;
case DCCP_RESPOND:
case DCCP_PARTOPEN:
queued = dccp_rcv_respond_partopen_state_process(sk, skb,
dh, len);
break;
}
if (dh->dccph_type == DCCP_PKT_ACK ||
dh->dccph_type == DCCP_PKT_DATAACK) {
switch (old_state) {
case DCCP_PARTOPEN:
sk->sk_state_change(sk);
sk_wake_async(sk, 0, POLL_OUT);
break;
}
}
if (!queued) {
discard:
__kfree_skb(skb);
}
return 0;
}
EXPORT_SYMBOL_GPL(dccp_rcv_state_process);

1076
net/dccp/ipv4.c Normal file

File diff suppressed because it is too large Load Diff

1269
net/dccp/ipv6.c Normal file

File diff suppressed because it is too large Load Diff

36
net/dccp/ipv6.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef _DCCP_IPV6_H
#define _DCCP_IPV6_H
/*
* net/dccp/ipv6.h
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/dccp.h>
#include <linux/ipv6.h>
struct dccp6_sock {
struct dccp_sock dccp;
/*
* ipv6_pinfo has to be the last member of dccp6_sock,
* see inet6_sk_generic.
*/
struct ipv6_pinfo inet6;
};
struct dccp6_request_sock {
struct dccp_request_sock dccp;
struct inet6_request_sock inet6;
};
struct dccp6_timewait_sock {
struct inet_timewait_sock inet;
struct inet6_timewait_sock tw6;
};
#endif /* _DCCP_IPV6_H */

302
net/dccp/minisocks.c Normal file
View File

@@ -0,0 +1,302 @@
/*
* net/dccp/minisocks.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/dccp.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
#include <net/sock.h>
#include <net/xfrm.h>
#include <net/inet_timewait_sock.h>
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
#include "feat.h"
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
.period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
.death_lock = SPIN_LOCK_UNLOCKED,
.hashinfo = &dccp_hashinfo,
.tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
(unsigned long)&dccp_death_row),
.twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
inet_twdr_twkill_work),
/* Short-time timewait calendar */
.twcal_hand = -1,
.twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
(unsigned long)&dccp_death_row),
};
EXPORT_SYMBOL_GPL(dccp_death_row);
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
struct inet_timewait_sock *tw = NULL;
if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
tw = inet_twsk_alloc(sk, state);
if (tw != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if (tw->tw_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
struct inet6_timewait_sock *tw6;
tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
tw6 = inet6_twsk((struct sock *)tw);
ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
tw->tw_ipv6only = np->ipv6only;
}
#endif
/* Linkage updates. */
__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
timeo = rto;
tw->tw_timeout = DCCP_TIMEWAIT_LEN;
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
inet_twsk_schedule(tw, &dccp_death_row, timeo,
DCCP_TIMEWAIT_LEN);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
DCCP_WARN("time wait bucket table overflow\n");
}
dccp_done(sk);
}
struct sock *dccp_create_openreq_child(struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb)
{
/*
* Step 3: Process LISTEN state
*
* (* Generate a new socket and switch to that socket *)
* Set S := new socket for this port pair
*/
struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct dccp_sock *newdp = dccp_sk(newsk);
struct dccp_minisock *newdmsk = dccp_msk(newsk);
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
newdp->dccps_service = dreq->dreq_service;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
do_gettimeofday(&newdp->dccps_epoch);
if (dccp_feat_clone(sk, newsk))
goto out_free;
if (newdmsk->dccpms_send_ack_vector) {
newdp->dccps_hc_rx_ackvec =
dccp_ackvec_alloc(GFP_ATOMIC);
if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
goto out_free;
}
newdp->dccps_hc_rx_ccid =
ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
newsk, GFP_ATOMIC);
newdp->dccps_hc_tx_ccid =
ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
newsk, GFP_ATOMIC);
if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
newdp->dccps_hc_tx_ccid == NULL)) {
dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
out_free:
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
sk_free(newsk);
return NULL;
}
/*
* Step 3: Process LISTEN state
*
* Choose S.ISS (initial seqno) or set from Init Cookies
* Initialize S.GAR := S.ISS
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
*/
/* See dccp_v4_conn_request */
newdmsk->dccpms_sequence_window = req->rcv_wnd;
newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
dccp_update_gsr(newsk, dreq->dreq_isr);
newdp->dccps_iss = dreq->dreq_iss;
dccp_update_gss(newsk, dreq->dreq_iss);
/*
* SWL and AWL are initially adjusted so that they are not less than
* the initial Sequence Numbers received and sent, respectively:
* SWL := max(GSR + 1 - floor(W/4), ISR),
* AWL := max(GSS - W' + 1, ISS).
* These adjustments MUST be applied only at the beginning of the
* connection.
*/
dccp_set_seqno(&newdp->dccps_swl,
max48(newdp->dccps_swl, newdp->dccps_isr));
dccp_set_seqno(&newdp->dccps_awl,
max48(newdp->dccps_awl, newdp->dccps_iss));
dccp_init_xmit_timers(newsk);
DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
}
EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
/*
* Process an incoming packet for RESPOND sockets represented
* as an request_sock.
*/
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct request_sock **prev)
{
struct sock *child = NULL;
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
struct dccp_request_sock *dreq = dccp_rsk(req);
if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
dccp_pr_debug("Retransmitted REQUEST\n");
dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
/*
* Send another RESPONSE packet
* To protect against Request floods, increment retrans
* counter (backoff, monitored by dccp_response_timer).
*/
req->retrans++;
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
}
/* Network Duplicate, discard packet */
return NULL;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
goto drop;
/* Invalid ACK */
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
"dreq_iss=%llu\n",
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq,
(unsigned long long)
dccp_rsk(req)->dreq_iss);
goto drop;
}
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
/* FIXME: deal with options */
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
out:
return child;
listen_overflow:
dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req, prev);
goto out;
}
EXPORT_SYMBOL_GPL(dccp_check_req);
/*
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
*/
int dccp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb)
{
int ret = 0;
const int state = child->sk_state;
if (!sock_owned_by_user(child)) {
ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
skb->len);
/* Wakeup parent, send SIGIO */
if (state == DCCP_RESPOND && child->sk_state != state)
parent->sk_data_ready(parent, 0);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
* socket does not protect us more.
*/
sk_add_backlog(child, skb);
}
bh_unlock_sock(child);
sock_put(child);
return ret;
}
EXPORT_SYMBOL_GPL(dccp_child_process);
void dccp_reqsk_send_ack(struct sk_buff *skb, struct request_sock *rsk)
{
DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
}
EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb)
{
inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
inet_rsk(req)->acked = 0;
req->rcv_wnd = sysctl_dccp_feat_sequence_window;
}
EXPORT_SYMBOL_GPL(dccp_reqsk_init);

582
net/dccp/options.c Normal file
View File

@@ -0,0 +1,582 @@
/*
* net/dccp/options.c
*
* An implementation of the DCCP protocol
* Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
* Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/dccp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
#include "feat.h"
int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
void dccp_minisock_init(struct dccp_minisock *dmsk)
{
dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid;
dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid;
dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio;
dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector;
dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count;
}
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
{
u32 value = 0;
if (len > 3)
value += *bf++ << 24;
if (len > 2)
value += *bf++ << 16;
if (len > 1)
value += *bf++ << 8;
if (len > 0)
value += *bf;
return value;
}
int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
const struct dccp_hdr *dh = dccp_hdr(skb);
const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr = options;
const unsigned char *opt_end = (unsigned char *)dh +
(dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
unsigned char *value;
u32 elapsed_time;
int rc;
int mandatory = 0;
memset(opt_recv, 0, sizeof(*opt_recv));
opt = len = 0;
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
value = NULL;
/* Check if this isn't a single byte option */
if (opt > DCCPO_MAX_RESERVED) {
if (opt_ptr == opt_end)
goto out_invalid_option;
len = *opt_ptr++;
if (len < 3)
goto out_invalid_option;
/*
* Remove the type and len fields, leaving
* just the value size
*/
len -= 2;
value = opt_ptr;
opt_ptr += len;
if (opt_ptr > opt_end)
goto out_invalid_option;
}
switch (opt) {
case DCCPO_PADDING:
break;
case DCCPO_MANDATORY:
if (mandatory)
goto out_invalid_option;
if (pkt_type != DCCP_PKT_DATA)
mandatory = 1;
break;
case DCCPO_NDP_COUNT:
if (len > 3)
goto out_invalid_option;
opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk),
opt_recv->dccpor_ndp);
break;
case DCCPO_CHANGE_L:
/* fall through */
case DCCPO_CHANGE_R:
if (len < 2)
goto out_invalid_option;
rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
len - 1);
/*
* When there is a change error, change_recv is
* responsible for dealing with it. i.e. reply with an
* empty confirm.
* If the change was mandatory, then we need to die.
*/
if (rc && mandatory)
goto out_invalid_option;
break;
case DCCPO_CONFIRM_L:
/* fall through */
case DCCPO_CONFIRM_R:
if (len < 2)
goto out_invalid_option;
if (dccp_feat_confirm_recv(sk, opt, *value,
value + 1, len - 1))
goto out_invalid_option;
break;
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
if (pkt_type == DCCP_PKT_DATA)
break;
if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
goto out_invalid_option;
break;
case DCCPO_TIMESTAMP:
if (len != 4)
goto out_invalid_option;
opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value);
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
dccp_timestamp(sk, &dp->dccps_timestamp_time);
dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
dccp_role(sk), opt_recv->dccpor_timestamp,
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
break;
case DCCPO_TIMESTAMP_ECHO:
if (len != 4 && len != 6 && len != 8)
goto out_invalid_option;
opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
"ackno=%llu, ", dccp_role(sk),
opt_recv->dccpor_timestamp_echo,
len + 2,
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
if (len == 4)
break;
if (len == 6)
elapsed_time = ntohs(*(__be16 *)(value + 4));
else
elapsed_time = ntohl(*(__be32 *)(value + 4));
/* Give precedence to the biggest ELAPSED_TIME */
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
break;
case DCCPO_ELAPSED_TIME:
if (len != 2 && len != 4)
goto out_invalid_option;
if (pkt_type == DCCP_PKT_DATA)
continue;
if (len == 2)
elapsed_time = ntohs(*(__be16 *)value);
else
elapsed_time = ntohl(*(__be32 *)value);
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
dccp_role(sk), elapsed_time);
break;
/*
* From RFC 4340, sec. 10.3:
*
* Option numbers 128 through 191 are for
* options sent from the HC-Sender to the
* HC-Receiver; option numbers 192 through 255
* are for options sent from the HC-Receiver to
* the HC-Sender.
*/
case 128 ... 191: {
const u16 idx = value - options;
if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
opt, len, idx,
value) != 0)
goto out_invalid_option;
}
break;
case 192 ... 255: {
const u16 idx = value - options;
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
opt, len, idx,
value) != 0)
goto out_invalid_option;
}
break;
default:
DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
"implemented, ignoring", sk, opt, len);
break;
}
if (opt != DCCPO_MANDATORY)
mandatory = 0;
}
/* mandatory was the last byte in option list -> reset connection */
if (mandatory)
goto out_invalid_option;
return 0;
out_invalid_option:
DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len);
return -1;
}
EXPORT_SYMBOL_GPL(dccp_parse_options);
static void dccp_encode_value_var(const u32 value, unsigned char *to,
const unsigned int len)
{
if (len > 3)
*to++ = (value & 0xFF000000) >> 24;
if (len > 2)
*to++ = (value & 0xFF0000) >> 16;
if (len > 1)
*to++ = (value & 0xFF00) >> 8;
if (len > 0)
*to++ = (value & 0xFF);
}
static inline int dccp_ndp_len(const int ndp)
{
return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
}
int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
const unsigned char option,
const void *value, const unsigned char len)
{
unsigned char *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN)
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
to = skb_push(skb, len + 2);
*to++ = option;
*to++ = len + 2;
memcpy(to, value, len);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_insert_option);
static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
int ndp = dp->dccps_ndp_count;
if (dccp_non_data_packet(skb))
++dp->dccps_ndp_count;
else
dp->dccps_ndp_count = 0;
if (ndp > 0) {
unsigned char *ptr;
const int ndp_len = dccp_ndp_len(ndp);
const int len = ndp_len + 2;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
ptr = skb_push(skb, len);
*ptr++ = DCCPO_NDP_COUNT;
*ptr++ = len;
dccp_encode_value_var(ndp, ptr, ndp_len);
}
return 0;
}
static inline int dccp_elapsed_time_len(const u32 elapsed_time)
{
return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
}
int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
u32 elapsed_time)
{
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
const int len = 2 + elapsed_time_len;
unsigned char *to;
if (elapsed_time_len == 0)
return 0;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_ELAPSED_TIME;
*to++ = len;
if (elapsed_time_len == 2) {
const __be16 var16 = htons((u16)elapsed_time);
memcpy(to, &var16, 2);
} else {
const __be32 var32 = htonl(elapsed_time);
memcpy(to, &var32, 4);
}
return 0;
}
EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
void dccp_timestamp(const struct sock *sk, struct timeval *tv)
{
const struct dccp_sock *dp = dccp_sk(sk);
do_gettimeofday(tv);
tv->tv_sec -= dp->dccps_epoch.tv_sec;
tv->tv_usec -= dp->dccps_epoch.tv_usec;
while (tv->tv_usec < 0) {
tv->tv_sec--;
tv->tv_usec += USEC_PER_SEC;
}
}
EXPORT_SYMBOL_GPL(dccp_timestamp);
int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
{
struct timeval tv;
__be32 now;
dccp_timestamp(sk, &tv);
now = htonl(timeval_usecs(&tv) / 10);
/* yes this will overflow but that is the point as we want a
* 10 usec 32 bit timer which mean it wraps every 11.9 hours */
return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
}
EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
static int dccp_insert_option_timestamp_echo(struct sock *sk,
struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct timeval now;
__be32 tstamp_echo;
u32 elapsed_time;
int len, elapsed_time_len;
unsigned char *to;
dccp_timestamp(sk, &now);
elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10;
elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
len = 6 + elapsed_time_len;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
to = skb_push(skb, len);
*to++ = DCCPO_TIMESTAMP_ECHO;
*to++ = len;
tstamp_echo = htonl(dp->dccps_timestamp_echo);
memcpy(to, &tstamp_echo, 4);
to += 4;
if (elapsed_time_len == 2) {
const __be16 var16 = htons((u16)elapsed_time);
memcpy(to, &var16, 2);
} else if (elapsed_time_len == 4) {
const __be32 var32 = htonl(elapsed_time);
memcpy(to, &var32, 4);
}
dp->dccps_timestamp_echo = 0;
dp->dccps_timestamp_time.tv_sec = 0;
dp->dccps_timestamp_time.tv_usec = 0;
return 0;
}
static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
u8 *val, u8 len)
{
u8 *to;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
DCCP_WARN("packet too small for feature %d option!\n", feat);
return -1;
}
DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
to = skb_push(skb, len + 3);
*to++ = type;
*to++ = len + 3;
*to++ = feat;
if (len)
memcpy(to, val, len);
dccp_pr_debug("%s(%s (%d), ...), length %d\n",
dccp_feat_typename(type),
dccp_feat_name(feat), feat, len);
return 0;
}
static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_minisock *dmsk = dccp_msk(sk);
struct dccp_opt_pend *opt, *next;
int change = 0;
/* confirm any options [NN opts] */
list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
dccp_insert_feat_opt(skb, opt->dccpop_type,
opt->dccpop_feat, opt->dccpop_val,
opt->dccpop_len);
/* fear empty confirms */
if (opt->dccpop_val)
kfree(opt->dccpop_val);
kfree(opt);
}
INIT_LIST_HEAD(&dmsk->dccpms_conf);
/* see which features we need to send */
list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
/* see if we need to send any confirm */
if (opt->dccpop_sc) {
dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
opt->dccpop_feat,
opt->dccpop_sc->dccpoc_val,
opt->dccpop_sc->dccpoc_len);
BUG_ON(!opt->dccpop_sc->dccpoc_val);
kfree(opt->dccpop_sc->dccpoc_val);
kfree(opt->dccpop_sc);
opt->dccpop_sc = NULL;
}
/* any option not confirmed, re-send it */
if (!opt->dccpop_conf) {
dccp_insert_feat_opt(skb, opt->dccpop_type,
opt->dccpop_feat, opt->dccpop_val,
opt->dccpop_len);
change++;
}
}
/* Retransmit timer.
* If this is the master listening sock, we don't set a timer on it. It
* should be fine because if the dude doesn't receive our RESPONSE
* [which will contain the CHANGE] he will send another REQUEST which
* will "retrnasmit" the change.
*/
if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
dccp_pr_debug("reset feat negotiation timer %p\n", sk);
/* XXX don't reset the timer on re-transmissions. I.e. reset it
* only when sending new stuff i guess. Currently the timer
* never backs off because on re-transmission it just resets it!
*/
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
}
return 0;
}
int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_minisock *dmsk = dccp_msk(sk);
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
if (dmsk->dccpms_send_ndp_count &&
dccp_insert_option_ndp(sk, skb))
return -1;
if (!dccp_packet_without_ack(skb)) {
if (dmsk->dccpms_send_ack_vector &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
dccp_insert_option_ackvec(sk, skb))
return -1;
if (dp->dccps_timestamp_echo != 0 &&
dccp_insert_option_timestamp_echo(sk, skb))
return -1;
}
if (dp->dccps_hc_rx_insert_options) {
if (ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb))
return -1;
dp->dccps_hc_rx_insert_options = 0;
}
/* Feature negotiation */
/* Data packets can't do feat negotiation */
if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
dccp_insert_options_feat(sk, skb))
return -1;
/* XXX: insert other options when appropriate */
if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
/* The length of all options has to be a multiple of 4 */
int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
if (padding != 0) {
padding = 4 - padding;
memset(skb_push(skb, padding), 0, padding);
DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
}
}
return 0;
}

560
net/dccp/output.c Normal file
View File

@@ -0,0 +1,560 @@
/*
* net/dccp/output.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/dccp.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/inet_sock.h>
#include <net/sock.h>
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
static inline void dccp_event_ack_sent(struct sock *sk)
{
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
{
skb_set_owner_w(skb, sk);
WARN_ON(sk->sk_send_head);
sk->sk_send_head = skb;
}
/*
* All SKB's seen here are completely headerless. It is our
* job to build the DCCP header, and pass the packet down to
* IP so it can do the same plus pass the packet off to the
* device.
*/
static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (likely(skb != NULL)) {
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
struct dccp_hdr *dh;
/* XXX For now we're using only 48 bits sequence numbers */
const u32 dccp_header_size = sizeof(*dh) +
sizeof(struct dccp_hdr_ext) +
dccp_packet_hdr_len(dcb->dccpd_type);
int err, set_ack = 1;
u64 ackno = dp->dccps_gsr;
dccp_inc_seqno(&dp->dccps_gss);
switch (dcb->dccpd_type) {
case DCCP_PKT_DATA:
set_ack = 0;
/* fall through */
case DCCP_PKT_DATAACK:
break;
case DCCP_PKT_REQUEST:
set_ack = 0;
/* fall through */
case DCCP_PKT_SYNC:
case DCCP_PKT_SYNCACK:
ackno = dcb->dccpd_seq;
/* fall through */
default:
/*
* Only data packets should come through with skb->sk
* set.
*/
WARN_ON(skb->sk);
skb_set_owner_w(skb, sk);
break;
}
dcb->dccpd_seq = dp->dccps_gss;
if (dccp_insert_options(sk, skb)) {
kfree_skb(skb);
return -EPROTO;
}
/* Build DCCP header and checksum it. */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_type = dcb->dccpd_type;
dh->dccph_sport = inet->sport;
dh->dccph_dport = inet->dport;
dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
dh->dccph_ccval = dcb->dccpd_ccval;
dh->dccph_cscov = dp->dccps_pcslen;
/* XXX For now we're using only 48 bits sequence numbers */
dh->dccph_x = 1;
dp->dccps_awh = dp->dccps_gss;
dccp_hdr_set_seq(dh, dp->dccps_gss);
if (set_ack)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
switch (dcb->dccpd_type) {
case DCCP_PKT_REQUEST:
dccp_hdr_request(skb)->dccph_req_service =
dp->dccps_service;
break;
case DCCP_PKT_RESET:
dccp_hdr_reset(skb)->dccph_reset_code =
dcb->dccpd_reset_code;
break;
}
icsk->icsk_af_ops->send_check(sk, 0, skb);
if (set_ack)
dccp_event_ack_sent(sk);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
err = icsk->icsk_af_ops->queue_xmit(skb, 0);
return net_xmit_eval(err);
}
return -ENOBUFS;
}
unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
/* Now subtract optional transport overhead */
mss_now -= icsk->icsk_ext_hdr_len;
/*
* FIXME: this should come from the CCID infrastructure, where, say,
* TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
* put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
* TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
* make it a multiple of 4
*/
mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
dp->dccps_mss_cache = mss_now;
return mss_now;
}
EXPORT_SYMBOL_GPL(dccp_sync_mss);
void dccp_write_space(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
sk_wake_async(sk, 2, POLL_OUT);
read_unlock(&sk->sk_callback_lock);
}
/**
* dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
* @sk: socket to wait for
*/
static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
DEFINE_WAIT(wait);
unsigned long delay;
int rc;
while (1) {
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
if (sk->sk_err)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (rc <= 0)
break;
delay = msecs_to_jiffies(rc);
sk->sk_write_pending++;
release_sock(sk);
schedule_timeout(delay);
lock_sock(sk);
sk->sk_write_pending--;
}
out:
finish_wait(sk->sk_sleep, &wait);
return rc;
do_error:
rc = -EPIPE;
goto out;
do_interrupted:
rc = -EINTR;
goto out;
}
void dccp_write_xmit(struct sock *sk, int block)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
while ((skb = skb_peek(&sk->sk_write_queue))) {
int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
if (err > 0) {
if (!block) {
sk_reset_timer(sk, &dp->dccps_xmit_timer,
msecs_to_jiffies(err)+jiffies);
break;
} else
err = dccp_wait_for_ccid(sk, skb);
if (err && err != -EINTR)
DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
}
skb_dequeue(&sk->sk_write_queue);
if (err == 0) {
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const int len = skb->len;
if (sk->sk_state == DCCP_PARTOPEN) {
/* See 8.1.5. Handshake Completion */
inet_csk_schedule_ack(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
inet_csk(sk)->icsk_rto,
DCCP_RTO_MAX);
dcb->dccpd_type = DCCP_PKT_DATAACK;
} else if (dccp_ack_pending(sk))
dcb->dccpd_type = DCCP_PKT_DATAACK;
else
dcb->dccpd_type = DCCP_PKT_DATA;
err = dccp_transmit_skb(sk, skb);
ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
if (err)
DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
err);
} else {
dccp_pr_debug("packet discarded\n");
kfree_skb(skb);
}
}
}
int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
return -EHOSTUNREACH; /* Routing failure or similar. */
return dccp_transmit_skb(sk, (skb_cloned(skb) ?
pskb_copy(skb, GFP_ATOMIC):
skb_clone(skb, GFP_ATOMIC)));
}
struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
struct request_sock *req)
{
struct dccp_hdr *dh;
struct dccp_request_sock *dreq;
const u32 dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response);
struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
dreq = dccp_rsk(req);
if (inet_rsk(req)->acked) /* increase ISS upon retransmission */
dccp_inc_seqno(&dreq->dreq_iss);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
if (dccp_insert_options(sk, skb)) {
kfree_skb(skb);
return NULL;
}
/* Build and checksum header */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_rsk(req)->rmt_port;
dh->dccph_doff = (dccp_header_size +
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dreq->dreq_iss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
dccp_csum_outgoing(skb);
/* We use `acked' to remember that a Response was already sent. */
inet_rsk(req)->acked = 1;
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
EXPORT_SYMBOL_GPL(dccp_make_response);
static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
const enum dccp_reset_codes code)
{
struct dccp_hdr *dh;
struct dccp_sock *dp = dccp_sk(sk);
const u32 dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
dccp_inc_seqno(&dp->dccps_gss);
DCCP_SKB_CB(skb)->dccpd_reset_code = code;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
if (dccp_insert_options(sk, skb)) {
kfree_skb(skb);
return NULL;
}
dh = dccp_zeroed_hdr(skb, dccp_header_size);
dh->dccph_sport = inet_sk(sk)->sport;
dh->dccph_dport = inet_sk(sk)->dport;
dh->dccph_doff = (dccp_header_size +
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESET;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dp->dccps_gss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
dccp_hdr_reset(skb)->dccph_reset_code = code;
inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
{
/*
* FIXME: what if rebuild_header fails?
* Should we be doing a rebuild_header here?
*/
int err = inet_sk_rebuild_header(sk);
if (err == 0) {
struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache,
code);
if (skb != NULL) {
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0);
return net_xmit_eval(err);
}
}
return err;
}
/*
* Do all connect socket setups that can be done AF independent.
*/
static inline void dccp_connect_init(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
sk->sk_err = 0;
sock_reset_flag(sk, SOCK_DONE);
dccp_sync_mss(sk, dst_mtu(dst));
/*
* SWL and AWL are initially adjusted so that they are not less than
* the initial Sequence Numbers received and sent, respectively:
* SWL := max(GSR + 1 - floor(W/4), ISR),
* AWL := max(GSS - W' + 1, ISS).
* These adjustments MUST be applied only at the beginning of the
* connection.
*/
dccp_update_gss(sk, dp->dccps_iss);
dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
/* S.GAR - greatest valid acknowledgement number received on a non-Sync;
* initialized to S.ISS (sec. 8.5) */
dp->dccps_gar = dp->dccps_iss;
icsk->icsk_retransmits = 0;
}
int dccp_connect(struct sock *sk)
{
struct sk_buff *skb;
struct inet_connection_sock *icsk = inet_csk(sk);
dccp_connect_init(sk);
skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
if (unlikely(skb == NULL))
return -ENOBUFS;
/* Reserve space for headers. */
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
/* Timer for repeating the REQUEST until an answer. */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
icsk->icsk_rto, DCCP_RTO_MAX);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_connect);
void dccp_send_ack(struct sock *sk)
{
/* If we have been reset, we may not send again. */
if (sk->sk_state != DCCP_CLOSED) {
struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header,
GFP_ATOMIC);
if (skb == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX,
DCCP_RTO_MAX);
return;
}
/* Reserve space for headers */
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
dccp_transmit_skb(sk, skb);
}
}
EXPORT_SYMBOL_GPL(dccp_send_ack);
void dccp_send_delayed_ack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/*
* FIXME: tune this timer. elapsed time fixes the skew, so no problem
* with using 2s, and active senders also piggyback the ACK into a
* DATAACK packet, so this is really for quiescent senders.
*/
unsigned long timeout = jiffies + 2 * HZ;
/* Use new timeout only if there wasn't a older one earlier. */
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
/* If delack timer was blocked or is about to expire,
* send ACK now.
*
* FIXME: check the "about to expire" part
*/
if (icsk->icsk_ack.blocked) {
dccp_send_ack(sk);
return;
}
if (!time_before(timeout, icsk->icsk_ack.timeout))
timeout = icsk->icsk_ack.timeout;
}
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type)
{
/*
* We are not putting this on the write queue, so
* dccp_transmit_skb() will set the ownership to this
* sock.
*/
struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
if (skb == NULL)
/* FIXME: how to make sure the sync is sent? */
return;
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
DCCP_SKB_CB(skb)->dccpd_seq = seq;
dccp_transmit_skb(sk, skb);
}
EXPORT_SYMBOL_GPL(dccp_send_sync);
/*
* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
* cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
* any circumstances.
*/
void dccp_send_close(struct sock *sk, const int active)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC;
skb = alloc_skb(sk->sk_prot->max_header, prio);
if (skb == NULL)
return;
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
if (active) {
dccp_write_xmit(sk, 1);
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, prio));
/* FIXME do we need a retransmit timer here? */
} else
dccp_transmit_skb(sk, skb);
}

202
net/dccp/probe.c Normal file
View File

@@ -0,0 +1,202 @@
/*
* dccp_probe - Observe the DCCP flow with kprobes.
*
* The idea for this came from Werner Almesberger's umlsim
* Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
*
* Modified for DCCP from Stephen Hemminger's code
* Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/socket.h>
#include <linux/dccp.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/vmalloc.h>
#include "dccp.h"
#include "ccid.h"
#include "ccids/ccid3.h"
static int port;
static int bufsize = 64 * 1024;
static const char procname[] = "dccpprobe";
struct {
struct kfifo *fifo;
spinlock_t lock;
wait_queue_head_t wait;
struct timeval tstart;
} dccpw;
static void printl(const char *fmt, ...)
{
va_list args;
int len;
struct timeval now;
char tbuf[256];
va_start(args, fmt);
do_gettimeofday(&now);
now.tv_sec -= dccpw.tstart.tv_sec;
now.tv_usec -= dccpw.tstart.tv_usec;
if (now.tv_usec < 0) {
--now.tv_sec;
now.tv_usec += 1000000;
}
len = sprintf(tbuf, "%lu.%06lu ",
(unsigned long) now.tv_sec,
(unsigned long) now.tv_usec);
len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
va_end(args);
kfifo_put(dccpw.fifo, tbuf, len);
wake_up(&dccpw.wait);
}
static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size)
{
const struct dccp_minisock *dmsk = dccp_msk(sk);
const struct inet_sock *inet = inet_sk(sk);
const struct ccid3_hc_tx_sock *hctx;
if (dmsk->dccpms_tx_ccid == DCCPC_CCID3)
hctx = ccid3_hc_tx_sk(sk);
else
hctx = NULL;
if (port == 0 || ntohs(inet->dport) == port ||
ntohs(inet->sport) == port) {
if (hctx)
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
NIPQUAD(inet->saddr), ntohs(inet->sport),
NIPQUAD(inet->daddr), ntohs(inet->dport), size,
hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
else
printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
NIPQUAD(inet->saddr), ntohs(inet->sport),
NIPQUAD(inet->daddr), ntohs(inet->dport), size);
}
jprobe_return();
return 0;
}
static struct jprobe dccp_send_probe = {
.kp = {
.symbol_name = "dccp_sendmsg",
},
.entry = JPROBE_ENTRY(jdccp_sendmsg),
};
static int dccpprobe_open(struct inode *inode, struct file *file)
{
kfifo_reset(dccpw.fifo);
do_gettimeofday(&dccpw.tstart);
return 0;
}
static ssize_t dccpprobe_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
int error = 0, cnt = 0;
unsigned char *tbuf;
if (!buf || len < 0)
return -EINVAL;
if (len == 0)
return 0;
tbuf = vmalloc(len);
if (!tbuf)
return -ENOMEM;
error = wait_event_interruptible(dccpw.wait,
__kfifo_len(dccpw.fifo) != 0);
if (error)
goto out_free;
cnt = kfifo_get(dccpw.fifo, tbuf, len);
error = copy_to_user(buf, tbuf, cnt);
out_free:
vfree(tbuf);
return error ? error : cnt;
}
static const struct file_operations dccpprobe_fops = {
.owner = THIS_MODULE,
.open = dccpprobe_open,
.read = dccpprobe_read,
};
static __init int dccpprobe_init(void)
{
int ret = -ENOMEM;
init_waitqueue_head(&dccpw.wait);
spin_lock_init(&dccpw.lock);
dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
if (IS_ERR(dccpw.fifo))
return PTR_ERR(dccpw.fifo);
if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
goto err0;
ret = register_jprobe(&dccp_send_probe);
if (ret)
goto err1;
pr_info("DCCP watch registered (port=%d)\n", port);
return 0;
err1:
proc_net_remove(procname);
err0:
kfifo_free(dccpw.fifo);
return ret;
}
module_init(dccpprobe_init);
static __exit void dccpprobe_exit(void)
{
kfifo_free(dccpw.fifo);
proc_net_remove(procname);
unregister_jprobe(&dccp_send_probe);
}
module_exit(dccpprobe_exit);
MODULE_PARM_DESC(port, "Port to match (0=all)");
module_param(port, int, 0);
MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
module_param(bufsize, int, 0);
MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
MODULE_DESCRIPTION("DCCP snooper");
MODULE_LICENSE("GPL");

1117
net/dccp/proto.c Normal file

File diff suppressed because it is too large Load Diff

141
net/dccp/sysctl.c Normal file
View File

@@ -0,0 +1,141 @@
/*
* net/dccp/sysctl.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@mandriva.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License v2
* as published by the Free Software Foundation.
*/
#include <linux/mm.h>
#include <linux/sysctl.h>
#include "dccp.h"
#include "feat.h"
#ifndef CONFIG_SYSCTL
#error This file should not be compiled without CONFIG_SYSCTL defined
#endif
static struct ctl_table dccp_default_table[] = {
{
.procname = "seq_window",
.data = &sysctl_dccp_feat_sequence_window,
.maxlen = sizeof(sysctl_dccp_feat_sequence_window),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "rx_ccid",
.data = &sysctl_dccp_feat_rx_ccid,
.maxlen = sizeof(sysctl_dccp_feat_rx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tx_ccid",
.data = &sysctl_dccp_feat_tx_ccid,
.maxlen = sizeof(sysctl_dccp_feat_tx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "ack_ratio",
.data = &sysctl_dccp_feat_ack_ratio,
.maxlen = sizeof(sysctl_dccp_feat_ack_ratio),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "send_ackvec",
.data = &sysctl_dccp_feat_send_ack_vector,
.maxlen = sizeof(sysctl_dccp_feat_send_ack_vector),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "send_ndp",
.data = &sysctl_dccp_feat_send_ndp_count,
.maxlen = sizeof(sysctl_dccp_feat_send_ndp_count),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "request_retries",
.data = &sysctl_dccp_request_retries,
.maxlen = sizeof(sysctl_dccp_request_retries),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "retries1",
.data = &sysctl_dccp_retries1,
.maxlen = sizeof(sysctl_dccp_retries1),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "retries2",
.data = &sysctl_dccp_retries2,
.maxlen = sizeof(sysctl_dccp_retries2),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "tx_qlen",
.data = &sysctl_dccp_tx_qlen,
.maxlen = sizeof(sysctl_dccp_tx_qlen),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ .ctl_name = 0, }
};
static struct ctl_table dccp_table[] = {
{
.ctl_name = NET_DCCP_DEFAULT,
.procname = "default",
.mode = 0555,
.child = dccp_default_table,
},
{ .ctl_name = 0, },
};
static struct ctl_table dccp_dir_table[] = {
{
.ctl_name = NET_DCCP,
.procname = "dccp",
.mode = 0555,
.child = dccp_table,
},
{ .ctl_name = 0, },
};
static struct ctl_table dccp_root_table[] = {
{
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
.child = dccp_dir_table,
},
{ .ctl_name = 0, },
};
static struct ctl_table_header *dccp_table_header;
int __init dccp_sysctl_init(void)
{
dccp_table_header = register_sysctl_table(dccp_root_table);
return dccp_table_header != NULL ? 0 : -ENOMEM;
}
void dccp_sysctl_exit(void)
{
if (dccp_table_header != NULL) {
unregister_sysctl_table(dccp_table_header);
dccp_table_header = NULL;
}
}

293
net/dccp/timer.c Normal file
View File

@@ -0,0 +1,293 @@
/*
* net/dccp/timer.c
*
* An implementation of the DCCP protocol
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/dccp.h>
#include <linux/skbuff.h>
#include "dccp.h"
/* sysctl variables governing numbers of retransmission attempts */
int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES;
int sysctl_dccp_retries1 __read_mostly = TCP_RETR1;
int sysctl_dccp_retries2 __read_mostly = TCP_RETR2;
static void dccp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
sk->sk_error_report(sk);
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
}
/* A write timeout has occurred. Process the after effects. */
static int dccp_write_timeout(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
if (icsk->icsk_retransmits != 0)
dst_negative_advice(&sk->sk_dst_cache);
retry_until = icsk->icsk_syn_retries ?
: sysctl_dccp_request_retries;
} else {
if (icsk->icsk_retransmits >= sysctl_dccp_retries1) {
/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
black hole detection. :-(
It is place to make it. It is not made. I do not want
to make it. It is disguisting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
"The one security concern raised by this memo is that ICMP black holes
are often caused by over-zealous security administrators who block
all ICMP messages. It is vitally important that those who design and
deploy security systems understand the impact of strict filtering on
upper-layer protocols. The safest web site in the world is worthless
if most TCP implementations cannot transfer data from it. It would
be far nicer to have all of the black holes fixed rather than fixing
all of the TCP implementations."
Golden words :-).
*/
dst_negative_advice(&sk->sk_dst_cache);
}
retry_until = sysctl_dccp_retries2;
/*
* FIXME: see tcp_write_timout and tcp_out_of_resources
*/
}
if (icsk->icsk_retransmits >= retry_until) {
/* Has it gone just too far? */
dccp_write_err(sk);
return 1;
}
return 0;
}
/*
* The DCCP retransmit timer.
*/
static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
/* retransmit timer is used for feature negotiation throughout
* connection. In this case, no packet is re-transmitted, but rather an
* ack is generated and pending changes are placed into its options.
*/
if (sk->sk_send_head == NULL) {
dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
if (sk->sk_state == DCCP_OPEN)
dccp_send_ack(sk);
goto backoff;
}
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
* packet types. The only packets eligible for retransmission are:
* -- Requests in client-REQUEST state (sec. 8.1.1)
* -- Acks in client-PARTOPEN state (sec. 8.1.5)
* -- CloseReq in server-CLOSEREQ state (sec. 8.3)
* -- Close in node-CLOSING state (sec. 8.3) */
BUG_TRAP(sk->sk_send_head != NULL);
/*
* More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
if (dccp_write_timeout(sk))
goto out;
/*
* We want to know the number of packets retransmitted, not the
* total number of retransmissions of clones of original packets.
*/
if (icsk->icsk_retransmits == 0)
DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
/*
* Retransmission failed because of local congestion,
* do not backoff.
*/
if (icsk->icsk_retransmits == 0)
icsk->icsk_retransmits = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
min(icsk->icsk_rto,
TCP_RESOURCE_PROBE_INTERVAL),
DCCP_RTO_MAX);
goto out;
}
backoff:
icsk->icsk_backoff++;
icsk->icsk_retransmits++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
DCCP_RTO_MAX);
if (icsk->icsk_retransmits > sysctl_dccp_retries1)
__sk_dst_reset(sk);
out:;
}
static void dccp_write_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
int event = 0;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later */
sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
jiffies + (HZ / 20));
goto out;
}
if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
icsk->icsk_timeout);
goto out;
}
event = icsk->icsk_pending;
icsk->icsk_pending = 0;
switch (event) {
case ICSK_TIME_RETRANS:
dccp_retransmit_timer(sk);
break;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/*
* Timer for listening sockets
*/
static void dccp_response_timer(struct sock *sk)
{
inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
DCCP_RTO_MAX);
}
static void dccp_keepalive_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
/* Only process if socket is not in use. */
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
inet_csk_reset_keepalive_timer(sk, HZ / 20);
goto out;
}
if (sk->sk_state == DCCP_LISTEN) {
dccp_response_timer(sk);
goto out;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
static void dccp_delack_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct inet_connection_sock *icsk = inet_csk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer,
jiffies + TCP_DELACK_MIN);
goto out;
}
if (sk->sk_state == DCCP_CLOSED ||
!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
sk_reset_timer(sk, &icsk->icsk_delack_timer,
icsk->icsk_ack.timeout);
goto out;
}
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
*/
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
/* Transmit-delay timer: used by the CCIDs to delay actual send time */
static void dccp_write_xmit_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct dccp_sock *dp = dccp_sk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
else
dccp_write_xmit(sk, 0);
bh_unlock_sock(sk);
sock_put(sk);
}
static void dccp_init_write_xmit_timer(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
init_timer(&dp->dccps_xmit_timer);
dp->dccps_xmit_timer.data = (unsigned long)sk;
dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
}
void dccp_init_xmit_timers(struct sock *sk)
{
dccp_init_write_xmit_timer(sk);
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}