From df71837d5024e2524cd51c93621e558aa7dd9f3f Mon Sep 17 00:00:00 2001 From: Trent Jaeger Date: Tue, 13 Dec 2005 23:12:27 -0800 Subject: [LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 ++++- include/linux/security.h | 132 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/xfrm.h | 29 +++++++++++ include/net/flow.h | 7 +-- include/net/xfrm.h | 27 +++++++++- 5 files changed, 202 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 724066778aff..6351c4055ace 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -216,6 +216,16 @@ struct sadb_x_nat_t_port { } __attribute__((packed)); /* sizeof(struct sadb_x_nat_t_port) == 8 */ +/* Generic LSM security context */ +struct sadb_x_sec_ctx { + uint16_t sadb_x_sec_len; + uint16_t sadb_x_sec_exttype; + uint8_t sadb_x_ctx_alg; /* LSMs: e.g., selinux == 1 */ + uint8_t sadb_x_ctx_doi; + uint16_t sadb_x_ctx_len; +} __attribute__((packed)); +/* sizeof(struct sadb_sec_ctx) = 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -325,7 +335,8 @@ struct sadb_x_nat_t_port { #define SADB_X_EXT_NAT_T_SPORT 21 #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 -#define SADB_EXT_MAX 23 +#define SADB_X_EXT_SEC_CTX 24 +#define SADB_EXT_MAX 24 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/security.h b/include/linux/security.h index f7e0ae018712..ef753654daa5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -59,6 +59,12 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; +struct flowi; +struct dst_entry; +struct xfrm_selector; +struct xfrm_policy; +struct xfrm_state; +struct xfrm_user_sec_ctx; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb); @@ -788,6 +794,52 @@ struct swap_info_struct; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. + * @sk_getsid: + * Retrieve the LSM-specific sid for the sock to enable caching of network + * authorizations. + * + * Security hooks for XFRM operations. + * + * @xfrm_policy_alloc_security: + * @xp contains the xfrm_policy being added to Security Policy Database + * used by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level policy update program (e.g., setkey). + * Allocate a security structure to the xp->selector.security field. + * The security field is initialized to NULL when the xfrm_policy is + * allocated. + * Return 0 if operation was successful (memory to allocate, legal context) + * @xfrm_policy_clone_security: + * @old contains an existing xfrm_policy in the SPD. + * @new contains a new xfrm_policy being cloned from old. + * Allocate a security structure to the new->selector.security field + * that contains the information from the old->selector.security field. + * Return 0 if operation was successful (memory to allocate). + * @xfrm_policy_free_security: + * @xp contains the xfrm_policy + * Deallocate xp->selector.security. + * @xfrm_state_alloc_security: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level SA generation program (e.g., setkey or racoon). + * Allocate a security structure to the x->sel.security field. The + * security field is initialized to NULL when the xfrm_state is + * allocated. + * Return 0 if operation was successful (memory to allocate, legal context). + * @xfrm_state_free_security: + * @x contains the xfrm_state. + * Deallocate x>sel.security. + * @xfrm_policy_lookup: + * @xp contains the xfrm_policy for which the access control is being + * checked. + * @sk_sid contains the sock security label that is used to authorize + * access to the policy xp. + * @dir contains the direction of the flow (input or output). + * Check permission when a sock selects a xfrm_policy for processing + * XFRMs on a packet. The hook is called when selecting either a + * per-socket policy or a generic xfrm policy. + * Return 0 if permission is granted. * * Security hooks affecting all Key Management operations * @@ -1237,8 +1289,18 @@ struct security_operations { int (*socket_getpeersec) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); + unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); + void (*xfrm_policy_free_security) (struct xfrm_policy *xp); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); + void (*xfrm_state_free_security) (struct xfrm_state *x); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + /* key management security hooks */ #ifdef CONFIG_KEYS int (*key_alloc)(struct key *key); @@ -2679,6 +2741,11 @@ static inline void security_sk_free(struct sock *sk) { return security_ops->sk_free_security(sk); } + +static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +{ + return security_ops->sk_getsid(sk, fl, dir); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -2795,8 +2862,73 @@ static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) static inline void security_sk_free(struct sock *sk) { } + +static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); +} + +static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +{ + return security_ops->xfrm_policy_clone_security(old, new); +} + +static inline void security_xfrm_policy_free(struct xfrm_policy *xp) +{ + security_ops->xfrm_policy_free_security(xp); +} + +static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_state_alloc_security(x, sec_ctx); +} + +static inline void security_xfrm_state_free(struct xfrm_state *x) +{ + security_ops->xfrm_state_free_security(x); +} + +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +{ + return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); +} +#else /* CONFIG_SECURITY_NETWORK_XFRM */ +static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +{ + return 0; +} + +static inline void security_xfrm_policy_free(struct xfrm_policy *xp) +{ +} + +static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline void security_xfrm_state_free(struct xfrm_state *x) +{ +} + +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +{ + return 0; +} +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY static inline int security_key_alloc(struct key *key) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 0fb077d68441..82fbb758e28f 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -27,6 +27,22 @@ struct xfrm_id __u8 proto; }; +struct xfrm_sec_ctx { + __u8 ctx_doi; + __u8 ctx_alg; + __u16 ctx_len; + __u32 ctx_sid; + char ctx_str[0]; +}; + +/* Security Context Domains of Interpretation */ +#define XFRM_SC_DOI_RESERVED 0 +#define XFRM_SC_DOI_LSM 1 + +/* Security Context Algorithms */ +#define XFRM_SC_ALG_RESERVED 0 +#define XFRM_SC_ALG_SELINUX 1 + /* Selector, used as selector both on policy rules (SPD) and SAs. */ struct xfrm_selector @@ -146,6 +162,18 @@ enum { #define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE) +/* + * Generic LSM security context for comunicating to user space + * NOTE: Same format as sadb_x_sec_ctx + */ +struct xfrm_user_sec_ctx { + __u16 len; + __u16 exttype; + __u8 ctx_alg; /* LSMs: e.g., selinux == 1 */ + __u8 ctx_doi; + __u16 ctx_len; +}; + struct xfrm_user_tmpl { struct xfrm_id id; __u16 family; @@ -176,6 +204,7 @@ enum xfrm_attr_type_t { XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ XFRMA_SA, XFRMA_POLICY, + XFRMA_SEC_CTX, /* struct xfrm_sec_ctx */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/flow.h b/include/net/flow.h index 9a5c94b1a0ec..ec7eb86eb203 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,11 +84,12 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 -typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, +struct sock; +typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, + flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1cdb87912137..487abca3ca6f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -144,6 +144,9 @@ struct xfrm_state * transformer. */ struct xfrm_type *type; + /* Security context */ + struct xfrm_sec_ctx *security; + /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; @@ -298,6 +301,7 @@ struct xfrm_policy __u8 flags; __u8 dead; __u8 xfrm_nr; + struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; @@ -510,6 +514,25 @@ xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } +#ifdef CONFIG_SECURITY_NETWORK_XFRM +/* If neither has a context --> match + * Otherwise, both must have a context and the sids, doi, alg must match + */ +static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) +{ + return ((!s1 && !s2) || + (s1 && s2 && + (s1->ctx_sid == s2->ctx_sid) && + (s1->ctx_doi == s2->ctx_doi) && + (s1->ctx_alg == s2->ctx_alg))); +} +#else +static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) +{ + return 1; +} +#endif + /* A struct encoding bundle of transformations to apply to some set of flow. * * dst->child points to the next element of bundle. @@ -878,8 +901,8 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, - int delete); +struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete); struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); void xfrm_policy_flush(void); u32 xfrm_get_acqseq(void); -- cgit v1.2.3-70-g09d2 From 89cee8b1cbb9dac40c92ef1968aea2b45f82fd18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Dec 2005 23:14:27 -0800 Subject: [IPV4]: Safer reassembly Another spin of Herbert Xu's "safer ip reassembly" patch for 2.6.16. (The original patch is here: http://marc.theaimsgroup.com/?l=linux-netdev&m=112281936522415&w=2 and my only contribution is to have tested it.) This patch (optionally) does additional checks before accepting IP fragments, which can greatly reduce the possibility of reassembling fragments which originated from different IP datagrams. Signed-off-by: Herbert Xu Signed-off-by: Arthur Kepner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 23 ++++++++++++ include/linux/sysctl.h | 1 + include/net/inetpeer.h | 1 + include/net/ip.h | 2 + net/ipv4/inetpeer.c | 1 + net/ipv4/ip_fragment.c | 68 +++++++++++++++++++++++++++++++++- net/ipv4/ip_output.c | 1 + net/ipv4/sysctl_net_ipv4.c | 10 +++++ 8 files changed, 106 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ebc09a159f62..2b7cf19a06ad 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -46,6 +46,29 @@ ipfrag_secret_interval - INTEGER for the hash secret) for IP fragments. Default: 600 +ipfrag_max_dist - INTEGER + ipfrag_max_dist is a non-negative integer value which defines the + maximum "disorder" which is allowed among fragments which share a + common IP source address. Note that reordering of packets is + not unusual, but if a large number of fragments arrive from a source + IP address while a particular fragment queue remains incomplete, it + probably indicates that one or more fragments belonging to that queue + have been lost. When ipfrag_max_dist is positive, an additional check + is done on fragments before they are added to a reassembly queue - if + ipfrag_max_dist (or more) fragments have arrived from a particular IP + address between additions to any IP fragment queue using that source + address, it's presumed that one or more fragments in the queue are + lost. The existing fragment queue will be dropped, and a new one + started. An ipfrag_max_dist value of zero disables this check. + + Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can + result in unnecessarily dropping fragment queues when normal + reordering of packets occurs, which could lead to poor application + performance. Using a very large value, e.g. 50000, increases the + likelihood of incorrectly reassembling IP fragments that originate + from different IP datagrams, which could result in data corruption. + Default: 64 + INET peer storage: inet_peer_threshold - INTEGER diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4be34ef8c2f7..93fa765e47d3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -390,6 +390,7 @@ enum NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, NET_TCP_CONG_CONTROL=110, NET_TCP_ABC=111, + NET_IPV4_IPFRAG_MAX_DIST=112, }; enum { diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 7fda471002b6..0965515f40cf 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -25,6 +25,7 @@ struct inet_peer __u32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ + atomic_t rid; /* Frag reception counter */ __u32 tcp_ts; unsigned long tcp_ts_stamp; }; diff --git a/include/net/ip.h b/include/net/ip.h index e4563bbee6ea..4d6294ba038e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,6 +45,7 @@ struct inet_skb_parm #define IPSKB_TRANSLATED 2 #define IPSKB_FORWARDED 4 #define IPSKB_XFRM_TUNNEL_SIZE 8 +#define IPSKB_FRAG_COMPLETE 16 }; struct ipcm_cookie @@ -168,6 +169,7 @@ extern int sysctl_ipfrag_high_thresh; extern int sysctl_ipfrag_low_thresh; extern int sysctl_ipfrag_time; extern int sysctl_ipfrag_secret_interval; +extern int sysctl_ipfrag_max_dist; /* From inetpeer.c */ extern int inet_peer_threshold; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2fc3fd38924f..ce5fe3f74a3d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create) return NULL; n->v4daddr = daddr; atomic_set(&n->refcnt, 1); + atomic_set(&n->rid, 0); n->ip_id_count = secure_ip_id(daddr); n->tcp_ts_stamp = 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8ce0ce2ee48e..ce2b70ce4018 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -22,6 +22,7 @@ * Patrick McHardy : LRU queue of frag heads for evictor. */ +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +58,8 @@ int sysctl_ipfrag_high_thresh = 256*1024; int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_max_dist = 64; + /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ @@ -89,8 +93,10 @@ struct ipq { spinlock_t lock; atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ - int iif; struct timeval stamp; + int iif; + unsigned int rid; + struct inet_peer *peer; }; /* Hash table. */ @@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work) BUG_TRAP(qp->last_in&COMPLETE); BUG_TRAP(del_timer(&qp->timer) == 0); + if (qp->peer) + inet_putpeer(qp->peer); + /* Release all fragment data. */ fp = qp->fragments; while (fp) { @@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) qp->meat = 0; qp->fragments = NULL; qp->iif = 0; + qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) return ip_frag_create(hash, iph, user); } +/* Is the fragment too far ahead to be part of ipq? */ +static inline int ip_frag_too_far(struct ipq *qp) +{ + struct inet_peer *peer = qp->peer; + unsigned int max = sysctl_ipfrag_max_dist; + unsigned int start, end; + + int rc; + + if (!peer || !max) + return 0; + + start = qp->rid; + end = atomic_inc_return(&peer->rid); + qp->rid = end; + + rc = qp->fragments && (end - start) > max; + + if (rc) { + IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + } + + return rc; +} + +static int ip_frag_reinit(struct ipq *qp) +{ + struct sk_buff *fp; + + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + atomic_inc(&qp->refcnt); + return -ETIMEDOUT; + } + + fp = qp->fragments; + do { + struct sk_buff *xp = fp->next; + frag_kfree_skb(fp, NULL); + fp = xp; + } while (fp); + + qp->last_in = 0; + qp->len = 0; + qp->meat = 0; + qp->fragments = NULL; + qp->iif = 0; + + return 0; +} + /* Add new segment to existing queue. */ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { @@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (qp->last_in & COMPLETE) goto err; + if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && + unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + ipq_kill(qp); + goto err; + } + offset = ntohs(skb->nh.iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index eba64e2bd397..2a830de3a699 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -445,6 +445,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) hlen = iph->ihl * 4; mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ + IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: * some transformers could create wrong frag_list or break existing diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 01444a02b48b..dbf82955aabe 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -22,6 +22,7 @@ extern int sysctl_ip_nonlocal_bind; #ifdef CONFIG_SYSCTL +static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -613,6 +614,15 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, + { + .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, + .procname = "ipfrag_max_dist", + .data = &sysctl_ipfrag_max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero + }, { .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", -- cgit v1.2.3-70-g09d2 From 971af18bbfabb7b7c9c548da34a51e30869c08fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:14:47 -0800 Subject: [IPV6]: Reuse inet_csk_get_port in tcp_v6_get_port Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 6 ++- net/dccp/ipv4.c | 3 +- net/ipv4/inet_connection_sock.c | 11 +++-- net/ipv4/tcp_ipv4.c | 3 +- net/ipv6/tcp_ipv6.c | 95 ++------------------------------------ 5 files changed, 21 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b0c99060b78d..edc68e858d51 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -192,8 +192,12 @@ extern struct request_sock *inet_csk_search_req(const struct sock *sk, const __u16 rport, const __u32 raddr, const __u32 laddr); +extern int inet_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb); extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, - struct sock *sk, unsigned short snum); + struct sock *sk, unsigned short snum, + int (*bind_conflict)(const struct sock *sk, + const struct inet_bind_bucket *tb)); extern struct dst_entry* inet_csk_route_req(struct sock *sk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 656e13e38cfb..1ac3e30ae791 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -37,7 +37,8 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo); static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) { - return inet_csk_get_port(&dccp_hashinfo, sk, snum); + return inet_csk_get_port(&dccp_hashinfo, sk, snum, + inet_csk_bind_conflict); } static void dccp_v4_hash(struct sock *sk) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3fe021f1a566..f05b6e761102 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); */ int sysctl_local_port_range[2] = { 1024, 4999 }; -static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +int inet_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; @@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke return node != NULL; } +EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); + /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */ int inet_csk_get_port(struct inet_hashinfo *hashinfo, - struct sock *sk, unsigned short snum) + struct sock *sk, unsigned short snum, + int (*bind_conflict)(const struct sock *sk, + const struct inet_bind_bucket *tb)) { struct inet_bind_hashbucket *head; struct hlist_node *node; @@ -125,7 +130,7 @@ tb_found: goto success; } else { ret = 1; - if (inet_csk_bind_conflict(sk, tb)) + if (bind_conflict(sk, tb)) goto fail_unlock; } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4d5021e1929b..2aa19c89a94a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { - return inet_csk_get_port(&tcp_hashinfo, sk, snum); + return inet_csk_get_port(&tcp_hashinfo, sk, snum, + inet_csk_bind_conflict); } static void tcp_v4_hash(struct sock *sk) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8827389abaf7..76c8f5a2f7f3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,8 +76,8 @@ static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; -static inline int tcp_v6_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb) +int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) { const struct sock *sk2; const struct hlist_node *node; @@ -97,97 +97,10 @@ static inline int tcp_v6_bind_conflict(const struct sock *sk, return node != NULL; } -/* Grrr, addr_type already calculated by caller, but I don't want - * to add some silly "cookie" argument to this method just for that. - * But it doesn't matter, the recalculation is in the rarest path - * this function ever takes. - */ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - struct hlist_node *node; - int ret; - - local_bh_disable(); - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; - - do { - head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) - goto next; - break; - next: - spin_unlock(&head->lock); - if (++rover > high) - rover = low; - } while (--remaining > 0); - - /* Exhausted local port range during search? It is not - * possible for us to be holding one of the bind hash - * locks if this test triggers, because if 'remaining' - * drops to zero, we broke out of the do/while loop at - * the top level, not from the 'break;' statement. - */ - ret = 1; - if (unlikely(remaining <= 0)) - goto fail; - - /* OK, here is the one we will use. */ - snum = rover; - } else { - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) - goto tb_found; - } - tb = NULL; - goto tb_not_found; -tb_found: - if (tb && !hlist_empty(&tb->owners)) { - if (tb->fastreuse > 0 && sk->sk_reuse && - sk->sk_state != TCP_LISTEN) { - goto success; - } else { - ret = 1; - if (tcp_v6_bind_conflict(sk, tb)) - goto fail_unlock; - } - } -tb_not_found: - ret = 1; - if (tb == NULL) { - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum); - if (tb == NULL) - goto fail_unlock; - } - if (hlist_empty(&tb->owners)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) - tb->fastreuse = 1; - else - tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; - -success: - if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); - ret = 0; - -fail_unlock: - spin_unlock(&head->lock); -fail: - local_bh_enable(); - return ret; + return inet_csk_get_port(&tcp_hashinfo, sk, snum, + inet6_csk_bind_conflict); } static __inline__ void __tcp_v6_hash(struct sock *sk) -- cgit v1.2.3-70-g09d2 From 90b19d31695371bd3ed256d4c9e280861cd6ae7e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:01 -0800 Subject: [IPV6]: Generalise __tcp_v6_hash, renaming it to __inet6_hash Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 26 ++++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 34 ++++------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 5a2beed5a770..a4a204f99ea6 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -48,6 +48,32 @@ static inline int inet6_sk_ehashfn(const struct sock *sk) return inet6_ehashfn(laddr, lport, faddr, fport); } +static inline void __inet6_hash(struct inet_hashinfo *hashinfo, + struct sock *sk) +{ + struct hlist_head *list; + rwlock_t *lock; + + BUG_TRAP(sk_unhashed(sk)); + + if (sk->sk_state == TCP_LISTEN) { + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &hashinfo->lhash_lock; + inet_listen_wlock(hashinfo); + } else { + unsigned int hash; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); + hash &= (hashinfo->ehash_size - 1); + list = &hashinfo->ehash[hash].chain; + lock = &hashinfo->ehash[hash].lock; + write_lock(lock); + } + + __sk_add_node(sk, list); + sock_prot_inc_use(sk->sk_prot); + write_unlock(lock); +} + /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 76c8f5a2f7f3..bf41f84d6692 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -103,32 +103,6 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) inet6_csk_bind_conflict); } -static __inline__ void __tcp_v6_hash(struct sock *sk) -{ - struct hlist_head *list; - rwlock_t *lock; - - BUG_TRAP(sk_unhashed(sk)); - - if (sk->sk_state == TCP_LISTEN) { - list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &tcp_hashinfo.lhash_lock; - inet_listen_wlock(&tcp_hashinfo); - } else { - unsigned int hash; - sk->sk_hash = hash = inet6_sk_ehashfn(sk); - hash &= (tcp_hashinfo.ehash_size - 1); - list = &tcp_hashinfo.ehash[hash].chain; - lock = &tcp_hashinfo.ehash[hash].lock; - write_lock(lock); - } - - __sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); - write_unlock(lock); -} - - static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { @@ -139,7 +113,7 @@ static void tcp_v6_hash(struct sock *sk) return; } local_bh_disable(); - __tcp_v6_hash(sk); + __inet6_hash(&tcp_hashinfo, sk); local_bh_enable(); } } @@ -374,7 +348,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); - __tcp_v6_hash(sk); + __inet6_hash(&tcp_hashinfo, sk); } spin_unlock(&head->lock); @@ -392,7 +366,7 @@ ok: spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __tcp_v6_hash(sk); + __inet6_hash(&tcp_hashinfo, sk); spin_unlock_bh(&head->lock); return 0; } else { @@ -1295,7 +1269,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; - __tcp_v6_hash(newsk); + __inet6_hash(&tcp_hashinfo, newsk); inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; -- cgit v1.2.3-70-g09d2 From c2977c2213993bff51911f4117281b31c4612591 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:12 -0800 Subject: [ICSK]: make inet_csk_reqsk_queue_hash_add timeout arg unsigned long Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 +- net/ipv4/inet_connection_sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index edc68e858d51..ccc81a1c550c 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -211,7 +211,7 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk, extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - const unsigned timeout); + unsigned long timeout); static inline void inet_csk_reqsk_queue_removed(struct sock *sk, struct request_sock *req) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f05b6e761102..e2bf508ed770 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -385,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_search_req); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - const unsigned timeout) + unsigned long timeout) { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; -- cgit v1.2.3-70-g09d2 From 8129765ac07c2455c927051e3a8b048b619b56ee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:24 -0800 Subject: [IPV6]: Generalise tcp_v6_search_req & tcp_v6_synq_add More work is needed tho to introduce inet6_request_sock from tcp6_request_sock, in the same layout considerations as ipv6_pinfo in inet_sock, next changeset will do that. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 31 ++++++++++++ include/net/request_sock.h | 2 +- net/ipv6/Makefile | 3 +- net/ipv6/inet6_connection_sock.c | 96 +++++++++++++++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 78 +++--------------------------- 5 files changed, 137 insertions(+), 73 deletions(-) create mode 100644 include/net/inet6_connection_sock.h create mode 100644 net/ipv6/inet6_connection_sock.c (limited to 'include') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h new file mode 100644 index 000000000000..aa30ebde70dc --- /dev/null +++ b/include/net/inet6_connection_sock.h @@ -0,0 +1,31 @@ +/* + * NET Generic infrastructure for INET6 connection oriented protocols. + * + * Authors: Many people, see the TCPv6 sources + * + * From code originally in TCPv6 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET6_CONNECTION_SOCK_H +#define _INET6_CONNECTION_SOCK_H + +#include + +struct sock; +struct request_sock; + +extern struct request_sock *inet6_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const struct in6_addr *raddr, + const struct in6_addr *laddr, + const int iif); + +extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned long timeout); +#endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b52cc52ffe39..11641c9384f7 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -244,7 +244,7 @@ static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, u32 hash, struct request_sock *req, - unsigned timeout) + unsigned long timeout) { struct listen_sock *lopt = queue->listen_opt; diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 6460eec834b7..9601fd7f9d66 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,7 +8,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ - ip6_flowlabel.o ipv6_syms.o netfilter.o + ip6_flowlabel.o ipv6_syms.o netfilter.o \ + inet6_connection_sock.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c new file mode 100644 index 000000000000..04ff44344f90 --- /dev/null +++ b/net/ipv6/inet6_connection_sock.c @@ -0,0 +1,96 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Support for INET6 connection oriented protocols. + * + * Authors: See the TCPv6 sources + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or(at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * request_sock (formerly open request) hash tables. + */ +static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) +{ + u32 a = raddr->s6_addr32[0]; + u32 b = raddr->s6_addr32[1]; + u32 c = raddr->s6_addr32[2]; + + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += rnd; + __jhash_mix(a, b, c); + + a += raddr->s6_addr32[3]; + b += (u32)rport; + __jhash_mix(a, b, c); + + return c & (synq_hsize - 1); +} + +struct request_sock *inet6_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const struct in6_addr *raddr, + const struct in6_addr *laddr, + const int iif) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + struct request_sock *req, **prev; + + for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, + lopt->hash_rnd, + lopt->nr_table_entries)]; + (req = *prev) != NULL; + prev = &req->dl_next) { + const struct tcp6_request_sock *treq = tcp6_rsk(req); + + if (inet_rsk(req)->rmt_port == rport && + req->rsk_ops->family == AF_INET6 && + ipv6_addr_equal(&treq->rmt_addr, raddr) && + ipv6_addr_equal(&treq->loc_addr, laddr) && + (!treq->iif || treq->iif == iif)) { + BUG_TRAP(req->sk == NULL); + *prevp = prev; + return req; + } + } + + return NULL; +} + +EXPORT_SYMBOL_GPL(inet6_csk_search_req); + +void inet6_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned long timeout) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet6_synq_hash(&tcp6_rsk(req)->rmt_addr, + inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); + + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); + inet_csk_reqsk_queue_added(sk, timeout); +} + +EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bf41f84d6692..5a10d30cec4a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -118,60 +119,6 @@ static void tcp_v6_hash(struct sock *sk) } } -/* - * Open request hash tables. - */ - -static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) -{ - u32 a, b, c; - - a = raddr->s6_addr32[0]; - b = raddr->s6_addr32[1]; - c = raddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += raddr->s6_addr32[3]; - b += (u32) rport; - __jhash_mix(a, b, c); - - return c & (TCP_SYNQ_HSIZE - 1); -} - -static struct request_sock *tcp_v6_search_req(const struct sock *sk, - struct request_sock ***prevp, - __u16 rport, - struct in6_addr *raddr, - struct in6_addr *laddr, - int iif) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; - - for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; - (req = *prev) != NULL; - prev = &req->dl_next) { - const struct tcp6_request_sock *treq = tcp6_rsk(req); - - if (inet_rsk(req)->rmt_port == rport && - req->rsk_ops->family == AF_INET6 && - ipv6_addr_equal(&treq->rmt_addr, raddr) && - ipv6_addr_equal(&treq->loc_addr, laddr) && - (!treq->iif || treq->iif == iif)) { - BUG_TRAP(req->sk == NULL); - *prevp = prev; - return req; - } - } - - return NULL; -} - static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, struct in6_addr *saddr, struct in6_addr *daddr, @@ -662,8 +609,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; - req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, inet6_iif(skb)); + req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, + &hdr->saddr, inet6_iif(skb)); if (!req) goto out; @@ -978,8 +925,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) struct sock *nsk; /* Find possible connection requests. */ - req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, inet6_iif(skb)); + req = inet6_csk_search_req(sk, &prev, th->source, + &skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, inet6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); @@ -1003,17 +951,6 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) return sk; } -static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); - - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); - inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); -} - - /* FIXME: this is substantially similar to the ipv4 code. * Can some kind of merge be done? -- erics */ @@ -1083,8 +1020,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (tcp_v6_send_synack(sk, req, NULL)) goto drop; - tcp_v6_synq_add(sk, req); - + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; drop: -- cgit v1.2.3-70-g09d2 From ca304b6104ffdd120bb6687a88a0625e58bc71cd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:40 -0800 Subject: [IPV6]: Introduce inet6_rsk() And inet6_rsk_offset in inet_request_sock, for the same reasons as inet_sock's pinfo6 member. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 4 ++++ include/linux/ipv6.h | 39 +++++++++++++++++++++++++++++++++------ net/ipv4/inet_diag.c | 8 ++++---- net/ipv6/inet6_connection_sock.c | 4 ++-- net/ipv6/tcp_ipv6.c | 19 +++++++++---------- 5 files changed, 52 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ip.h b/include/linux/ip.h index 33e8a19a1a0f..5a560daeade5 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -110,6 +110,10 @@ struct ip_options { struct inet_request_sock { struct request_sock req; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u16 inet6_rsk_offset; + /* 2 bytes hole, try to pack */ +#endif u32 loc_addr; u32 rmt_addr; u16 rmt_port; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e0b922785d98..7d3e86d9576e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -199,18 +199,17 @@ static inline int inet6_iif(const struct sk_buff *skb) return IP6CB(skb)->iif; } -struct tcp6_request_sock { - struct tcp_request_sock req; +struct inet6_request_sock { struct in6_addr loc_addr; struct in6_addr rmt_addr; struct sk_buff *pktopts; int iif; }; -static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk) -{ - return (struct tcp6_request_sock *)sk; -} +struct tcp6_request_sock { + struct tcp_request_sock tcp6rsk_tcp; + struct inet6_request_sock tcp6rsk_inet6; +}; /** * struct ipv6_pinfo - ipv6 private area @@ -304,6 +303,28 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return inet_sk(__sk)->pinet6; } +static inline struct inet6_request_sock * + inet6_rsk(const struct request_sock *rsk) +{ + return (struct inet6_request_sock *)(((u8 *)rsk) + + inet_rsk(rsk)->inet6_rsk_offset); +} + +static inline u32 inet6_rsk_offset(struct request_sock *rsk) +{ + return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock); +} + +static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops) +{ + struct request_sock *req = reqsk_alloc(ops); + + if (req != NULL) + inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req); + + return req; +} + static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return (struct raw6_sock *)sk; @@ -361,6 +382,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } +static inline struct inet6_request_sock * + inet6_rsk(const struct request_sock *rsk) +{ + return NULL; +} + static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 39061ed53cfd..3ce73b141d7e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -489,9 +489,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6_rsk(req)->loc_addr); + &inet6_rsk(req)->loc_addr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6_rsk(req)->rmt_addr); + &inet6_rsk(req)->rmt_addr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -553,13 +553,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.saddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? - tcp6_rsk(req)->loc_addr.s6_addr32 : + inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? - tcp6_rsk(req)->rmt_addr.s6_addr32 : + inet6_rsk(req)->rmt_addr.s6_addr32 : #endif &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 04ff44344f90..fe874eeaa40c 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -61,7 +61,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { - const struct tcp6_request_sock *treq = tcp6_rsk(req); + const struct inet6_request_sock *treq = inet6_rsk(req); if (inet_rsk(req)->rmt_port == rport && req->rsk_ops->family == AF_INET6 && @@ -85,7 +85,7 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet6_synq_hash(&tcp6_rsk(req)->rmt_addr, + const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd, lopt->nr_table_entries); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a10d30cec4a..c2472d771664 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -656,7 +656,7 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { - struct tcp6_request_sock *treq = tcp6_rsk(req); + struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; @@ -722,8 +722,8 @@ done: static void tcp_v6_reqsk_destructor(struct request_sock *req) { - if (tcp6_rsk(req)->pktopts) - kfree_skb(tcp6_rsk(req)->pktopts); + if (inet6_rsk(req)->pktopts) + kfree_skb(inet6_rsk(req)->pktopts); } static struct request_sock_ops tcp6_request_sock_ops = { @@ -956,7 +956,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp6_request_sock *treq; + struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); @@ -981,7 +981,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp6_request_sock_ops); + req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) goto drop; @@ -994,7 +994,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); - treq = tcp6_rsk(req); + treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); TCP_ECN_create_request(req, skb->h.th); @@ -1035,7 +1035,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct tcp6_request_sock *treq = tcp6_rsk(req); + struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1723,14 +1723,13 @@ static int tcp_v6_destroy_sock(struct sock *sk) static void get_openreq6(struct seq_file *seq, struct sock *sk, struct request_sock *req, int i, int uid) { - struct in6_addr *dest, *src; int ttd = req->expires - jiffies; + struct in6_addr *src = &inet6_rsk(req)->loc_addr; + struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; if (ttd < 0) ttd = 0; - src = &tcp6_rsk(req)->loc_addr; - dest = &tcp6_rsk(req)->rmt_addr; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", -- cgit v1.2.3-70-g09d2 From 8292a17a399ffb7c5c8b083db4ad994e090055f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:52 -0800 Subject: [ICSK]: Rename struct tcp_func to struct inet_connection_sock_af_ops And move it to struct inet_connection_sock. DCCP will use it in the upcoming changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 -- include/net/inet_connection_sock.h | 26 ++++++++++++++++++++ include/net/tcp.h | 50 +------------------------------------- include/net/transp_v6.h | 2 +- net/ipv4/syncookies.c | 4 +-- net/ipv4/tcp.c | 8 +++--- net/ipv4/tcp_input.c | 11 +++++---- net/ipv4/tcp_ipv4.c | 11 ++++----- net/ipv4/tcp_minisocks.c | 8 +++--- net/ipv4/tcp_output.c | 17 ++++++------- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/tcp_ipv6.c | 28 ++++++++++----------- 12 files changed, 71 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0e1da6602e05..4e1434007f44 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -295,8 +295,6 @@ struct tcp_sock { struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ - struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ - __u32 rcv_wnd; /* Current receiver window */ __u32 rcv_wup; /* rcv_nxt on last window update sent */ __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ccc81a1c550c..9e20d201e951 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -15,6 +15,7 @@ #ifndef _INET_CONNECTION_SOCK_H #define _INET_CONNECTION_SOCK_H +#include #include #include #include @@ -29,6 +30,29 @@ struct inet_bind_bucket; struct inet_hashinfo; struct tcp_congestion_ops; +/* + * Pointers to address related TCP functions + * (i.e. things that depend on the address family) + */ +struct inet_connection_sock_af_ops { + int (*queue_xmit)(struct sk_buff *skb, int ipfragok); + void (*send_check)(struct sock *sk, int len, + struct sk_buff *skb); + int (*rebuild_header)(struct sock *sk); + int (*conn_request)(struct sock *sk, struct sk_buff *skb); + struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); + int (*remember_stamp)(struct sock *sk); + __u16 net_header_len; + int (*setsockopt)(struct sock *sk, int level, int optname, + char __user *optval, int optlen); + int (*getsockopt)(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); + void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); + int sockaddr_len; +}; + /** inet_connection_sock - INET connection oriented sock * * @icsk_accept_queue: FIFO of established children @@ -37,6 +61,7 @@ struct tcp_congestion_ops; * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout * @icsk_ca_ops Pluggable congestion control hook + * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -55,6 +80,7 @@ struct inet_connection_sock { struct timer_list icsk_delack_timer; __u32 icsk_rto; struct tcp_congestion_ops *icsk_ca_ops; + struct inet_connection_sock_af_ops *icsk_af_ops; __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; diff --git a/include/net/tcp.h b/include/net/tcp.h index d78025f9fbea..83b117a25c2a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,53 +224,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -/* - * Pointers to address related TCP functions - * (i.e. things that depend on the address family) - */ - -struct tcp_func { - int (*queue_xmit) (struct sk_buff *skb, - int ipfragok); - - void (*send_check) (struct sock *sk, - struct tcphdr *th, - int len, - struct sk_buff *skb); - - int (*rebuild_header) (struct sock *sk); - - int (*conn_request) (struct sock *sk, - struct sk_buff *skb); - - struct sock * (*syn_recv_sock) (struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); - - int (*remember_stamp) (struct sock *sk); - - __u16 net_header_len; - - int (*setsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int optlen); - - int (*getsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int __user *optlen); - - - void (*addr2sockaddr) (struct sock *sk, - struct sockaddr *); - - int sockaddr_len; -}; - /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). @@ -405,8 +358,7 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern void tcp_v4_send_check(struct sock *sk, - struct tcphdr *th, int len, +extern void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); extern int tcp_v4_conn_request(struct sock *sk, diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 4e86f2de6638..61f724c1036f 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -44,7 +44,7 @@ extern int datagram_send_ctl(struct msghdr *msg, /* * address family specific functions */ -extern struct tcp_func ipv4_specific; +extern struct inet_connection_sock_af_ops ipv4_specific; extern int inet6_destroy_sock(struct sock *sk); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a34e60ea48a1..e20be3331f67 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; - child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); + child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); if (child) inet_csk_reqsk_queue_add(sk, req, child); else diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ef98b14ac56d..eacfe6a3442c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int err = 0; if (level != SOL_TCP) - return tp->af_specific->setsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->setsockopt(sk, level, optname, + optval, optlen); /* This is a string value all the others are int's */ if (optname == TCP_CONGESTION) { @@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int val, len; if (level != SOL_TCP) - return tp->af_specific->getsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->getsockopt(sk, level, optname, + optval, optlen); if (get_user(len, optlen)) return -EFAULT; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf2e23086bce..7de6184d4bd8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4071,8 +4071,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, mb(); tcp_set_state(sk, TCP_ESTABLISHED); + icsk = inet_csk(sk); + /* Make sure socket is routed, for correct metrics. */ - tp->af_specific->rebuild_header(sk); + icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); @@ -4098,8 +4100,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - icsk = inet_csk(sk); - if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_ack.pingpong) { @@ -4220,6 +4220,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int queued = 0; tp->rx_opt.saw_tstamp = 0; @@ -4236,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if(th->syn) { - if(tp->af_specific->conn_request(sk, skb) < 0) + if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; /* Now we have several options: In theory there is @@ -4349,7 +4350,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Make sure socket is routed, for * correct metrics. */ - tp->af_specific->rebuild_header(sk); + icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2aa19c89a94a..704cf2105795 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -86,8 +86,7 @@ int sysctl_tcp_low_latency; /* Socket used for sending RSTs */ static struct socket *tcp_socket; -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb); +void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .lhash_lock = RW_LOCK_UNLOCKED, @@ -645,10 +644,10 @@ out: } /* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb) +void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); + struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_HW) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); @@ -1383,7 +1382,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) return 0; } -struct tcp_func ipv4_specific = { +struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -1434,7 +1433,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - tp->af_specific = &ipv4_specific; + icsk->icsk_af_ops = &ipv4_specific; sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1b66a2ac4321..9c029683a626 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -274,18 +274,18 @@ kill: void tcp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; + const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = tp->af_specific->remember_stamp(sk); + recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - const struct inet_connection_sock *icsk = inet_csk(sk); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; @@ -456,7 +456,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock **prev) { struct tcphdr *th = skb->h.th; - struct tcp_sock *tp = tcp_sk(sk); u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; struct tcp_options_received tmp_opt; @@ -613,7 +612,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, * ESTABLISHED STATE. If it will be dropped after * socket is created, wait for troubles. */ - child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, + req, NULL); if (child == NULL) goto listen_overflow; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b7325e0b406a..af1946c52c37 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -371,7 +371,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ECN_send(sk, tp, skb, tcp_header_size); } - tp->af_specific->send_check(sk, th, skb->len, skb); + icsk->icsk_af_ops->send_check(sk, skb->len, skb); if (likely(tcb->flags & TCPCB_FLAG_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); @@ -381,7 +381,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_INC_STATS(TCP_MIB_OUTSEGS); - err = tp->af_specific->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (unlikely(err <= 0)) return err; @@ -638,12 +638,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); - int mss_now; - /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ - mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr); + int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len - + sizeof(struct tcphdr)); /* Clamp it (mss_clamp does not include tcp options) */ if (mss_now > tp->rx_opt.mss_clamp) @@ -705,9 +704,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) xmit_size_goal = mss_now; if (doing_tso) { - xmit_size_goal = 65535 - - tp->af_specific->net_header_len - - tp->ext_header_len - tp->tcp_header_len; + xmit_size_goal = (65535 - + inet_csk(sk)->icsk_af_ops->net_header_len - + tp->ext_header_len - tp->tcp_header_len); if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1))) @@ -1422,7 +1421,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); - if(tp->af_specific->rebuild_header(sk)) + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ /* Some Solaris stacks overoptimize and ignore the FIN on a diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3620718defe6..b6b63fa8454c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -170,7 +170,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inc_use(&tcp_prot); local_bh_enable(); sk->sk_prot = &tcp_prot; - tp->af_specific = &ipv4_specific; + inet_csk(sk)->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, tp->pmtu_cookie); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c2472d771664..8ce8a1359d2b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -68,14 +68,14 @@ static void tcp_v6_send_reset(struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); -static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, +static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); -static struct tcp_func ipv6_mapped; -static struct tcp_func ipv6_specific; +static struct inet_connection_sock_af_ops ipv6_mapped; +static struct inet_connection_sock_af_ops ipv6_specific; int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) @@ -107,9 +107,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->af_specific == &ipv6_mapped) { + if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { tcp_prot.hash(sk); return; } @@ -417,14 +415,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - tp->af_specific = &ipv6_mapped; + inet_csk(sk)->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { tp->ext_header_len = exthdrlen; - tp->af_specific = &ipv6_specific; + inet_csk(sk)->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; goto failure; } else { @@ -751,10 +749,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) } -static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb) +static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); + struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_HW) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); @@ -1070,7 +1068,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); - newtp->af_specific = &ipv6_mapped; + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; @@ -1084,7 +1082,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 af_tcp.af_specific. + worked with IPv6 icsk.icsk_af_ops. Sync it now. */ tcp_sync_mss(newsk, newtp->pmtu_cookie); @@ -1631,7 +1629,7 @@ static int tcp_v6_remember_stamp(struct sock *sk) return 0; } -static struct tcp_func ipv6_specific = { +static struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = tcp_v6_xmit, .send_check = tcp_v6_send_check, .rebuild_header = tcp_v6_rebuild_header, @@ -1650,7 +1648,7 @@ static struct tcp_func ipv6_specific = { * TCP over IPv4 via INET6 API */ -static struct tcp_func ipv6_mapped = { +static struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -1700,7 +1698,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; - tp->af_specific = &ipv6_specific; + icsk->icsk_af_ops = &ipv6_specific; icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); -- cgit v1.2.3-70-g09d2 From af05dc9394feb193d221bc9d4c6db768facb4b40 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:16:04 -0800 Subject: [ICSK]: Move v4_addr2sockaddr from TCP to icsk Renaming it to inet_csk_addr2sockaddr. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ net/ipv4/inet_connection_sock.c | 12 ++++++++++++ net/ipv4/tcp_ipv4.c | 12 +----------- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9e20d201e951..e50e2b890c6d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -303,4 +303,6 @@ static inline unsigned int inet_csk_listen_poll(const struct sock *sk) extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); extern void inet_csk_listen_stop(struct sock *sk); +extern void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e2bf508ed770..ae20281d8deb 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -636,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); + +void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + const struct inet_sock *inet = inet_sk(sk); + + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = inet->daddr; + sin->sin_port = inet->dport; +} + +EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 704cf2105795..0b5ab04d3c5a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1314,16 +1314,6 @@ do_time_wait: goto discard_it; } -static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) -{ - struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; - struct inet_sock *inet = inet_sk(sk); - - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = inet->daddr; - sin->sin_port = inet->dport; -} - /* VJ's idea. Save last timestamp seen from this destination * and hold it at least for normal timewait interval to use for duplicate * segment detection in subsequent connections, before they enter synchronized @@ -1392,7 +1382,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .addr2sockaddr = v4_addr2sockaddr, + .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), }; -- cgit v1.2.3-70-g09d2 From 3305b80c214c642b89cd5c21af83bc91ec13f8bd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Dec 2005 23:16:37 -0800 Subject: [IP]: Simplify and consolidate MSG_PEEK error handling When a packet is obtained from skb_recv_datagram with MSG_PEEK enabled it is left on the socket receive queue. This means that when we detect a checksum error we have to be careful when trying to free the packet as someone could have dequeued it in the time being. Currently this delicate logic is duplicated three times between UDPv4, UDPv6 and RAWv6. This patch moves them into a one place and simplifies the code somewhat. This is based on a suggestion by Eric Dumazet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 36 ++++++++++++++++++++++++++++++++++++ net/ipv4/udp.c | 15 +-------------- net/ipv6/raw.c | 16 +++------------- net/ipv6/udp.c | 16 ++-------------- 5 files changed, 44 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8c5d6001a923..97f6580ce039 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1239,6 +1239,8 @@ extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +extern void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, + unsigned int flags); extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); extern int skb_copy_bits(const struct sk_buff *skb, int offset, diff --git a/net/core/datagram.c b/net/core/datagram.c index 1bcfef51ac58..f8d322e1ea92 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -199,6 +200,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +/** + * skb_kill_datagram - Free a datagram skbuff forcibly + * @sk: socket + * @skb: datagram skbuff + * @flags: MSG_ flags + * + * This function frees a datagram skbuff that was received by + * skb_recv_datagram. The flags argument must match the one + * used for skb_recv_datagram. + * + * If the MSG_PEEK flag is set, and the packet is still on the + * receive queue of the socket, it will be taken off the queue + * before it is freed. + * + * This function currently only disables BH when acquiring the + * sk_receive_queue lock. Therefore it must not be used in a + * context where that lock is acquired in an IRQ context. + */ + +void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) +{ + if (flags & MSG_PEEK) { + spin_lock_bh(&sk->sk_receive_queue.lock); + if (skb == skb_peek(&sk->sk_receive_queue)) { + __skb_unlink(skb, &sk->sk_receive_queue); + atomic_dec(&skb->users); + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + } + + kfree_skb(skb); +} + +EXPORT_SYMBOL(skb_kill_datagram); + /** * skb_copy_datagram_iovec - Copy a datagram to an iovec. * @skb: buffer to copy diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2422a5f7195d..012c4621e40a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -846,20 +846,7 @@ out: csum_copy_err: UDP_INC_STATS_BH(UDP_MIB_INERRORS); - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); + skb_kill_datagram(sk, skb, flags); if (noblock) return -EAGAIN; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a66900cda2af..66f1d12ea578 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -433,25 +434,14 @@ out: return err; csum_copy_err: - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } + skb_kill_datagram(sk, skb, flags); /* Error for blocking case is chosen to masquerade as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; /* FIXME: increment a raw6 drops counter here */ - goto out_free; + goto out; } static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5cc8731eb55b..d8538dcea813 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -300,20 +301,7 @@ out: return err; csum_copy_err: - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); + skb_kill_datagram(sk, skb, flags); if (flags & MSG_DONTWAIT) { UDP6_INC_STATS_USER(UDP_MIB_INERRORS); -- cgit v1.2.3-70-g09d2 From c1cbe4b7ad0bc4b1d98ea708a3fecb7362aa4088 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 13 Dec 2005 23:22:19 -0800 Subject: [NET]: Avoid atomic xchg() for non-error case It also looks like there were 2 places where the test on sk_err was missing from the event wait logic (in sk_stream_wait_connect and sk_stream_wait_memory), while the rest of the sock_error() users look to be doing the right thing. This version of the patch fixes those, and cleans up a few places that were testing ->sk_err directly. Signed-off-by: Benjamin LaHaise Signed-off-by: David S. Miller --- include/net/sock.h | 5 ++++- net/bluetooth/af_bluetooth.c | 5 ++--- net/bluetooth/l2cap.c | 5 +++-- net/bluetooth/sco.c | 5 +++-- net/core/stream.c | 10 +++++++--- net/irda/af_irda.c | 5 +++-- net/llc/af_llc.c | 5 ++--- 7 files changed, 24 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 982b4ecd187b..0fbae85c6d55 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1166,7 +1166,10 @@ static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) static inline int sock_error(struct sock *sk) { - int err = xchg(&sk->sk_err, 0); + int err; + if (likely(!sk->sk_err)) + return 0; + err = xchg(&sk->sk_err, 0); return -err; } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ea616e3fc98e..fb031fe9be9e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -287,10 +287,9 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) timeo = schedule_timeout(timeo); lock_sock(sk); - if (sk->sk_err) { - err = sock_error(sk); + err = sock_error(sk); + if (err) break; - } } set_current_state(TASK_RUNNING); remove_wait_queue(sk->sk_sleep, &wait); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index e3bb11ca4235..95f33cc7a24e 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -767,8 +767,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms BT_DBG("sock %p, sk %p", sock, sk); - if (sk->sk_err) - return sock_error(sk); + err = sock_error(sk); + if (err) + return err; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9cb00dc6c08c..648181430699 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -637,8 +637,9 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, BT_DBG("sock %p, sk %p", sock, sk); - if (sk->sk_err) - return sock_error(sk); + err = sock_error(sk); + if (err) + return err; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; diff --git a/net/core/stream.c b/net/core/stream.c index 15bfd03e8024..35e25259fd95 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -55,8 +55,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) int done; do { - if (sk->sk_err) - return sock_error(sk); + int err = sock_error(sk); + if (err) + return err; if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) return -EPIPE; if (!*timeo_p) @@ -67,6 +68,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, + !sk->sk_err && !((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); finish_wait(sk->sk_sleep, &wait); @@ -137,7 +139,9 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, sk_stream_memory_free(sk) && + sk_wait_event(sk, ¤t_timeo, !sk->sk_err && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + sk_stream_memory_free(sk) && vm_wait); sk->sk_write_pending--; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 6f92f9c62990..f121f7de2032 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1438,8 +1438,9 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, /* * POSIX 1003.1g mandates this order. */ - if (sk->sk_err) - ret = sock_error(sk); + ret = sock_error(sk); + if (ret) + break; else if (sk->sk_shutdown & RCV_SHUTDOWN) ; else if (noblock) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c3f0b0783453..b6d3df5c911c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -566,10 +566,9 @@ static int llc_wait_data(struct sock *sk, long timeo) /* * POSIX 1003.1g mandates this order. */ - if (sk->sk_err) { - rc = sock_error(sk); + rc = sock_error(sk); + if (rc) break; - } rc = 0; if (sk->sk_shutdown & RCV_SHUTDOWN) break; -- cgit v1.2.3-70-g09d2 From b9750ce13c08aa8a71a9b138d741f3046aefd991 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:22:54 -0800 Subject: [IPV6]: Generalise some functions Using sk->sk_protocol instead of IPPROTO_TCP. Will be used by DCCPv6 in the next changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 + include/net/inet6_connection_sock.h | 13 ++- net/ipv6/af_inet6.c | 52 ++++++++++++ net/ipv6/inet6_connection_sock.c | 103 ++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 153 +----------------------------------- 5 files changed, 173 insertions(+), 150 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7d3e86d9576e..69a0decfbdf4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -297,6 +297,8 @@ struct tcp6_sock { struct ipv6_pinfo inet6; }; +extern int inet6_sk_rebuild_header(struct sock *sk); + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index aa30ebde70dc..b33b438bffcc 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,8 +15,15 @@ #include -struct sock; +struct in6_addr; +struct inet_bind_bucket; struct request_sock; +struct sk_buff; +struct sock; +struct sockaddr; + +extern int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb); extern struct request_sock *inet6_csk_search_req(const struct sock *sk, struct request_sock ***prevp, @@ -28,4 +35,8 @@ extern struct request_sock *inet6_csk_search_req(const struct sock *sk, extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, const unsigned long timeout); + +extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); + +extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d9546380fa04..fd040e9a1f47 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -609,6 +609,58 @@ inet6_unregister_protosw(struct inet_protosw *p) } } +int inet6_sk_rebuild_header(struct sock *sk) +{ + int err; + struct dst_entry *dst; + struct ipv6_pinfo *np = inet6_sk(sk); + + dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst == NULL) { + struct inet_sock *inet = inet_sk(sk); + struct in6_addr *final_p = NULL, final; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl6_flowlabel = np->flow_label; + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = inet->dport; + fl.fl_ip_sport = inet->sport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) { + sk->sk_route_caps = 0; + return err; + } + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + sk->sk_err_soft = -err; + return err; + } + + ip6_dst_store(sk, dst, NULL); + sk->sk_route_caps = dst->dev->features & + ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + } + + return 0; +} + +EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); + int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) { diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index fe874eeaa40c..792f90f0f9ec 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -21,8 +21,34 @@ #include #include +#include +#include +#include #include +int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) +{ + const struct sock *sk2; + const struct hlist_node *node; + + /* We must walk the whole port owner list in this case. -DaveM */ + sk_for_each_bound(sk2, node, &tb->owners) { + if (sk != sk2 && + (!sk->sk_bound_dev_if || + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && + (!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + + return node != NULL; +} + +EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); + /* * request_sock (formerly open request) hash tables. */ @@ -94,3 +120,80 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, } EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); + +void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; + + sin6->sin6_family = AF_INET6; + ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); + sin6->sin6_port = inet_sk(sk)->dport; + /* We do not store received flowlabel for TCP */ + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + if (sk->sk_bound_dev_if && + ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6->sin6_scope_id = sk->sk_bound_dev_if; +} + +EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); + +int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) +{ + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi fl; + struct dst_entry *dst; + struct in6_addr *final_p = NULL, final; + + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl6_flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->dport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst == NULL) { + int err = ip6_dst_lookup(sk, &dst, &fl); + + if (err) { + sk->sk_err_soft = -err; + return err; + } + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + sk->sk_route_caps = 0; + return err; + } + + ip6_dst_store(sk, dst, NULL); + sk->sk_route_caps = dst->dev->features & + ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + } + + skb->dst = dst_clone(dst); + + /* Restore final destination back after routing done */ + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + + return ip6_xmit(sk, skb, &fl, np->opt, 0); +} + +EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8ce8a1359d2b..2f932ce72610 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -72,32 +72,10 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); static struct inet_connection_sock_af_ops ipv6_mapped; static struct inet_connection_sock_af_ops ipv6_specific; -int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb) -{ - const struct sock *sk2; - const struct hlist_node *node; - - /* We must walk the whole port owner list in this case. -DaveM */ - sk_for_each_bound(sk2, node, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - } - - return node != NULL; -} - static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { return inet_csk_get_port(&tcp_hashinfo, sk, snum, @@ -1500,129 +1478,6 @@ do_time_wait: goto discard_it; } -static int tcp_v6_rebuild_header(struct sock *sk) -{ - int err; - struct dst_entry *dst; - struct ipv6_pinfo *np = inet6_sk(sk); - - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p = NULL, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { - sk->sk_route_caps = 0; - return err; - } - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; - return err; - } - - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - } - - return 0; -} - -static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) -{ - struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; - struct dst_entry *dst; - struct in6_addr *final_p = NULL, final; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_sport = inet->sport; - fl.fl_ip_dport = inet->dport; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - int err = ip6_dst_lookup(sk, &dst, &fl); - - if (err) { - sk->sk_err_soft = -err; - return err; - } - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - sk->sk_route_caps = 0; - return err; - } - - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - } - - skb->dst = dst_clone(dst); - - /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - - return ip6_xmit(sk, skb, &fl, np->opt, 0); -} - -static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; - - sin6->sin6_family = AF_INET6; - ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); - sin6->sin6_port = inet_sk(sk)->dport; - /* We do not store received flowlabel for TCP */ - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (sk->sk_bound_dev_if && - ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = sk->sk_bound_dev_if; -} - static int tcp_v6_remember_stamp(struct sock *sk) { /* Alas, not yet... */ @@ -1630,9 +1485,9 @@ static int tcp_v6_remember_stamp(struct sock *sk) } static struct inet_connection_sock_af_ops ipv6_specific = { - .queue_xmit = tcp_v6_xmit, + .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, - .rebuild_header = tcp_v6_rebuild_header, + .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .remember_stamp = tcp_v6_remember_stamp, @@ -1640,7 +1495,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = { .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = v6_addr2sockaddr, + .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6) }; @@ -1659,7 +1514,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = v6_addr2sockaddr, + .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6) }; -- cgit v1.2.3-70-g09d2 From 0fa1a53e1f055a6c790f40e7728f42a825b29248 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:23:09 -0800 Subject: [IPV6]: Introduce inet6_timewait_sock Out of tcp6_timewait_sock, that now is just an aggregation of inet_timewait_sock and inet6_timewait_sock, using tw_ipv6_offset in struct inet_timewait_sock, that is common to the IPv6 transport protocols that use timewait sockets, like DCCP and TCP. tw_ipv6_offset plays the struct inet_sock pinfo6 role, i.e. for the generic code to find the IPv6 area in a timewait sock. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 32 +++++++++++++++++++++----------- include/net/inet6_hashtables.h | 6 +++--- include/net/inet_timewait_sock.h | 3 ++- net/ipv4/inet_diag.c | 6 +++--- net/ipv4/tcp_minisocks.c | 8 +++++--- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 12 ++++++------ 7 files changed, 41 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 69a0decfbdf4..7d3908594fac 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -348,26 +348,36 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #include +struct inet6_timewait_sock { + struct in6_addr tw_v6_daddr; + struct in6_addr tw_v6_rcv_saddr; +}; + struct tcp6_timewait_sock { - struct tcp_timewait_sock tw_v6_sk; - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; + struct tcp_timewait_sock tcp6tw_tcp; + struct inet6_timewait_sock tcp6tw_inet6; }; -static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) +static inline u16 inet6_tw_offset(const struct proto *prot) +{ + return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock); +} + +static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk) { - return (struct tcp6_timewait_sock *)sk; + return (struct inet6_timewait_sock *)(((u8 *)sk) + + inet_twsk(sk)->tw_ipv6_offset); } -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) +static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; + &inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr; } -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) +static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; + return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) @@ -395,8 +405,8 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#define __tcp_v6_rcv_saddr(__sk) NULL -#define tcp_v6_rcv_saddr(__sk) NULL +#define __inet6_rcv_saddr(__sk) NULL +#define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index a4a204f99ea6..25f708ff020e 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -110,10 +110,10 @@ static inline struct sock * if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 28f7b2103505..ca240f856c46 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -127,7 +127,8 @@ struct inet_timewait_sock { __u16 tw_num; /* And these are ours. */ __u8 tw_ipv6only:1; - /* 31 bits hole, try to pack */ + /* 15 bits hole, try to pack */ + __u16 tw_ipv6_offset; int tw_timeout; unsigned long tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3ce73b141d7e..c49908192047 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -112,12 +112,12 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6tw->tw_v6_rcv_saddr); + &tw6->tw_v6_rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6tw->tw_v6_daddr); + &tw6->tw_v6_daddr); } #endif nlh->nlmsg_len = skb->tail - b; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9c029683a626..2b9b7f6c7f7c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); + struct inet6_timewait_sock *tw6; - ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); + tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); + tw6 = inet6_twsk((struct sock *)tw); + ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); + ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); tw->tw_ipv6only = np->ipv6only; } #endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a60585fd85ad..704fb73e6c5f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device * int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2f932ce72610..cb880079daf3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -138,14 +138,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); tw = inet_twsk(sk2); if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -1663,14 +1663,14 @@ static void get_timewait6_sock(struct seq_file *seq, { struct in6_addr *dest, *src; __u16 destp, srcp; - struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); + struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) ttd = 0; - dest = &tcp6tw->tw_v6_daddr; - src = &tcp6tw->tw_v6_rcv_saddr; + dest = &tw6->tw_v6_daddr; + src = &tw6->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); -- cgit v1.2.3-70-g09d2 From 399c07def62a77678d633f5b3005431423a424a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:24:28 -0800 Subject: [IPV6]: Export ipv6_opt_accepted It was already non-TCP specific, will be used by DCCPv6. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/af_inet6.c | 21 +++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 16 ---------------- 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 0a2ad51cff82..851376108ac2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -240,6 +240,8 @@ extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_t struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt); +extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); + extern int ip6_frag_nqueues; extern atomic_t ip6_frag_mem; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 23675ef1f428..bf17aab9b776 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -663,6 +663,27 @@ int inet6_sk_rebuild_header(struct sock *sk) EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); +int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet6_skb_parm *opt = IP6CB(skb); + + if (np->rxopt.all) { + if ((opt->hop && (np->rxopt.bits.hopopts || + np->rxopt.bits.ohopopts)) || + ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && + np->rxopt.bits.rxflow) || + (opt->srcrt && (np->rxopt.bits.srcrt || + np->rxopt.bits.osrcrt)) || + ((opt->dst1 || opt->dst0) && + (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) + return 1; + } + return 0; +} + +EXPORT_SYMBOL_GPL(ipv6_opt_accepted); + int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cb880079daf3..e5c8a669e84e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -711,22 +711,6 @@ static struct request_sock_ops tcp6_request_sock_ops = { .send_reset = tcp_v6_send_reset }; -static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_skb_parm *opt = IP6CB(skb); - - if (np->rxopt.all) { - if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || - ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || - (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || - ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) - return 1; - } - return 0; -} - - static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); -- cgit v1.2.3-70-g09d2 From 6d6ee43e0b8b8d4847627fd43739b98ec2b9404f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:19 -0800 Subject: [TWSK]: Introduce struct timewait_sock_ops So that we can share several timewait sockets related functions and make the timewait mini sockets infrastructure closer to the request mini sockets one. Next changesets will take advantage of this, moving more code out of TCP and DCCP v4 and v6 to common infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +- include/net/inet_timewait_sock.h | 3 +- include/net/sock.h | 4 +-- include/net/tcp.h | 3 ++ include/net/timewait_sock.h | 31 ++++++++++++++++++ net/core/sock.c | 21 ++++++------ net/dccp/ipv4.c | 9 ++++- net/dccp/ipv6.c | 6 +++- net/ipv4/inet_timewait_sock.c | 5 +-- net/ipv4/tcp_ipv4.c | 71 ++++++++++++++++++++++++---------------- net/ipv6/tcp_ipv6.c | 25 +++++--------- 11 files changed, 118 insertions(+), 63 deletions(-) create mode 100644 include/net/timewait_sock.h (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7d3908594fac..a0d04891fe12 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -360,7 +360,8 @@ struct tcp6_timewait_sock { static inline u16 inet6_tw_offset(const struct proto *prot) { - return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock); + return prot->twsk_prot->twsk_obj_size - + sizeof(struct inet6_timewait_sock); } static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index ca240f856c46..e396a65473d7 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -26,6 +26,7 @@ #include #include +#include #include @@ -200,7 +201,7 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - kmem_cache_free(tw->tw_prot->twsk_slab, tw); + kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } } diff --git a/include/net/sock.h b/include/net/sock.h index 0fbae85c6d55..91d28957dc10 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,6 +493,7 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; +struct timewait_sock_ops; /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface @@ -557,11 +558,10 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; - kmem_cache_t *twsk_slab; - unsigned int twsk_obj_size; atomic_t *orphan_count; struct request_sock_ops *rsk_prot; + struct timewait_sock_ops *twsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index 83b117a25c2a..176221cd0cce 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -287,6 +287,9 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); +extern int tcp_twsk_unique(struct sock *sk, + struct sock *sktw, void *twp); + static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h new file mode 100644 index 000000000000..2544281e1d5e --- /dev/null +++ b/include/net/timewait_sock.h @@ -0,0 +1,31 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Authors: Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _TIMEWAIT_SOCK_H +#define _TIMEWAIT_SOCK_H + +#include +#include + +struct timewait_sock_ops { + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + int (*twsk_unique)(struct sock *sk, + struct sock *sktw, void *twp); +}; + +static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) +{ + if (sk->sk_prot->twsk_prot->twsk_unique != NULL) + return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp); + return 0; +} + +#endif /* _TIMEWAIT_SOCK_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 13cc3be4f056..6465b0e4c8cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1488,7 +1488,7 @@ int proto_register(struct proto *prot, int alloc_slab) } } - if (prot->twsk_obj_size) { + if (prot->twsk_prot != NULL) { static const char mask[] = "tw_sock_%s"; timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); @@ -1497,11 +1497,12 @@ int proto_register(struct proto *prot, int alloc_slab) goto out_free_request_sock_slab; sprintf(timewait_sock_slab_name, mask, prot->name); - prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, - prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (prot->twsk_slab == NULL) + prot->twsk_prot->twsk_slab = + kmem_cache_create(timewait_sock_slab_name, + prot->twsk_prot->twsk_obj_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } } @@ -1548,12 +1549,12 @@ void proto_unregister(struct proto *prot) prot->rsk_prot->slab = NULL; } - if (prot->twsk_slab != NULL) { - const char *name = kmem_cache_name(prot->twsk_slab); + if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { + const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); - kmem_cache_destroy(prot->twsk_slab); + kmem_cache_destroy(prot->twsk_prot->twsk_slab); kfree(name); - prot->twsk_slab = NULL; + prot->twsk_prot->twsk_slab = NULL; } } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bc28d71905e2..e11cda0cb6b3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1309,6 +1310,10 @@ static struct request_sock_ops dccp_request_sock_ops = { .send_reset = dccp_v4_ctl_send_reset, }; +static struct timewait_sock_ops dccp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct inet_timewait_sock), +}; + struct proto dccp_prot = { .name = "DCCP", .owner = THIS_MODULE, @@ -1332,5 +1337,7 @@ struct proto dccp_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), .rsk_prot = &dccp_request_sock_ops, - .twsk_obj_size = sizeof(struct inet_timewait_sock), + .twsk_prot = &dccp_timewait_sock_ops, }; + +EXPORT_SYMBOL_GPL(dccp_prot); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index a7d2aee5b3af..4d078f5b911b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -652,6 +652,10 @@ static struct request_sock_ops dccp6_request_sock_ops = { .send_reset = dccp_v6_ctl_send_reset, }; +static struct timewait_sock_ops dccp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct dccp6_timewait_sock), +}; + static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1359,7 +1363,7 @@ static struct proto dccp_v6_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), .rsk_prot = &dccp6_request_sock_ops, - .twsk_obj_size = sizeof(struct dccp6_timewait_sock), + .twsk_prot = &dccp6_timewait_sock_ops, }; static struct inet6_protocol dccp_v6_protocol = { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a010e9a68811..417f126c749e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) { - struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, - SLAB_ATOMIC); + struct inet_timewait_sock *tw = + kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, + SLAB_ATOMIC); if (tw != NULL) { const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0b5ab04d3c5a..6728772a943a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include @@ -118,6 +119,39 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) skb->h.th->source); } +int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) +{ + const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); + struct tcp_sock *tp = tcp_sk(sk); + + /* With PAWS, it is safe from the viewpoint + of data integrity. Even without PAWS it is safe provided sequence + spaces do not overlap i.e. at data rates <= 80Mbit/sec. + + Actually, the idea is close to VJ's one, only timestamp cache is + held not per host, but per port pair and TW bucket is used as state + holder. + + If TW bucket has been already destroyed we fall back to VJ's scheme + and use initial timestamp retrieved from peer table. + */ + if (tcptw->tw_ts_recent_stamp && + (twp == NULL || (sysctl_tcp_tw_reuse && + xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; + if (tp->write_seq == 0) + tp->write_seq = 1; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; + sock_hold(sktw); + return 1; + } + + return 0; +} + +EXPORT_SYMBOL_GPL(tcp_twsk_unique); + /* called with local bh disabled */ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, struct inet_timewait_sock **twp) @@ -142,35 +176,9 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); - struct tcp_sock *tp = tcp_sk(sk); - - /* With PAWS, it is safe from the viewpoint - of data integrity. Even without PAWS it - is safe provided sequence spaces do not - overlap i.e. at data rates <= 80Mbit/sec. - - Actually, the idea is close to VJ's one, - only timestamp cache is held not per host, - but per port pair and TW bucket is used - as state holder. - - If TW bucket has been already destroyed we - fall back to VJ's scheme and use initial - timestamp retrieved from peer table. - */ - if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - - tcptw->tw_ts_recent_stamp > 1))) { - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - sock_hold(sk2); + if (twsk_unique(sk, sk2, twp)) goto unique; - } else + else goto not_unique; } } @@ -869,6 +877,11 @@ struct request_sock_ops tcp_request_sock_ops = { .send_reset = tcp_v4_send_reset, }; +static struct timewait_sock_ops tcp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_unique = tcp_twsk_unique, +}; + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -1979,7 +1992,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), - .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e5c8a669e84e..514b57bb80b7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -60,6 +60,7 @@ #include #include #include +#include #include @@ -147,22 +148,9 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); - struct tcp_sock *tp = tcp_sk(sk); - - if (tcptw->tw_ts_recent_stamp && - (!twp || - (sysctl_tcp_tw_reuse && - xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { - /* See comment in tcp_ipv4.c */ - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (!tp->write_seq) - tp->write_seq = 1; - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - sock_hold(sk2); + if (twsk_unique(sk, sk2, twp)) goto unique; - } else + else goto not_unique; } } @@ -711,6 +699,11 @@ static struct request_sock_ops tcp6_request_sock_ops = { .send_reset = tcp_v6_send_reset }; +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, +}; + static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1752,7 +1745,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, }; -- cgit v1.2.3-70-g09d2 From a7f5e7f164788a22eb5d3de8e2d3cee1bf58fdca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:31 -0800 Subject: [INET]: Generalise tcp_v4_hash_connect Renaming it to inet_hash_connect, making it possible to ditch dccp_v4_hash_connect and share the same code with TCP instead. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/char/random.c | 6 +- include/linux/random.h | 2 +- include/net/inet_hashtables.h | 3 + net/dccp/ipv4.c | 160 +------------------------------------ net/ipv4/inet_hashtables.c | 178 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 173 +--------------------------------------- 6 files changed, 186 insertions(+), 336 deletions(-) (limited to 'include') diff --git a/drivers/char/random.c b/drivers/char/random.c index 7999da25fe40..79b59d986af4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1554,10 +1554,8 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, EXPORT_SYMBOL(secure_tcp_sequence_number); - - -/* Generate secure starting point for ephemeral TCP port search */ -u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport) +/* Generate secure starting point for ephemeral IPV4 transport port search */ +u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport) { struct keydata *keyptr = get_keyptr(); u32 hash[4]; diff --git a/include/linux/random.h b/include/linux/random.h index 7b2adb3322d5..01424a8e621c 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -52,7 +52,7 @@ extern void get_random_bytes(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); extern __u32 secure_ip_id(__u32 daddr); -extern u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport); +extern u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport); extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport); extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 07840baa9341..c83baa79f66e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -434,4 +434,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, return sk; } + +extern int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); #endif /* _INET_HASHTABLES_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e11cda0cb6b3..671fbf3b2379 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -54,164 +54,6 @@ void dccp_unhash(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_unhash); -/* called with local bh disabled */ -static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const u32 daddr = inet->rcv_saddr; - const u32 saddr = inet->daddr; - const int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); - const struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { - tw = inet_twsk(sk2); - - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - - /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); - sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp != NULL) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw != NULL) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &dccp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -/* - * Bind a port for a connect operation and hash it. - */ -static int dccp_v4_hash_connect(struct sock *sk) -{ - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - do { - head = &dccp_hashinfo.bhash[inet_bhashfn(rover, - dccp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == rover) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__dccp_v4_check_established(sk, - rover, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, - head, rover); - if (tb == NULL) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - if (++rover > high) - rover = low; - } while (--remaining > 0); - - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - /* All locks still held and bhs disabled */ - inet_bind_hash(sk, tb, rover); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(rover); - __inet_hash(&dccp_hashinfo, sk, 0); - } - spin_unlock(&head->lock); - - if (tw != NULL) { - inet_twsk_deschedule(tw, &dccp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &dccp_hashinfo.bhash[inet_bhashfn(snum, - dccp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { - __inet_hash(&dccp_hashinfo, sk, 0); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __dccp_v4_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); @@ -272,7 +114,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ dccp_set_state(sk, DCCP_REQUESTING); - err = dccp_v4_hash_connect(sk); + err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e8d29fe736d2..33228115cda4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -15,12 +15,14 @@ #include #include +#include #include #include #include #include #include +#include /* * Allocate and initialize a new local port bind bucket. @@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad } EXPORT_SYMBOL_GPL(__inet_lookup_listener); + +/* called with local bh disabled */ +static int __inet_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_sock *inet = inet_sk(sk); + u32 daddr = inet->rcv_saddr; + u32 saddr = inet->daddr; + int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + prefetch(head->chain.first); + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + tw = inet_twsk(sk2); + + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else + goto not_unique; + } + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + +unique: + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. */ + inet->num = lport; + inet->sport = htons(lport); + sk->sk_hash = hash; + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw) { + /* Silly. Should hash-dance instead... */ + inet_twsk_deschedule(tw, death_row); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +static inline u32 inet_sk_port_offset(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, + inet->dport); +} + +/* + * Bind a port for a connect operation and hash it. + */ +int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (!snum) { + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int range = high - low; + int i; + int port; + static u32 hint; + u32 offset = hint + inet_sk_port_offset(sk); + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; + head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == port) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__inet_check_established(death_row, + sk, port, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); + if (!tb) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + hint += i; + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(port); + __inet_hash(hinfo, sk, 0); + } + spin_unlock(&head->lock); + + if (tw) { + inet_twsk_deschedule(tw, death_row);; + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { + __inet_hash(hinfo, sk, 0); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __inet_check_established(death_row, sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6728772a943a..c2fe61becd61 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -152,177 +152,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) EXPORT_SYMBOL_GPL(tcp_twsk_unique); -/* called with local bh disabled */ -static int __tcp_v4_check_established(struct sock *sk, __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - u32 daddr = inet->rcv_saddr; - u32 saddr = inet->daddr; - int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = inet_twsk(sk2); - - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - -unique: - /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); - sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &tcp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 connect_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - - return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, - inet->dport); -} - -/* - * Bind a port for a connect operation and hash it. - */ -static inline int tcp_v4_hash_connect(struct sock *sk) -{ - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + connect_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__tcp_v4_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet_hash(&tcp_hashinfo, sk, 0); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &tcp_death_row);; - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet_hash(&tcp_hashinfo, sk, 0); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __tcp_v4_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -403,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); - err = tcp_v4_hash_connect(sk); + err = inet_hash_connect(&tcp_death_row, sk); if (err) goto failure; -- cgit v1.2.3-70-g09d2 From d8313f5ca2b1f86b7df6c99fc4b3fffa1f84e92b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:44 -0800 Subject: [INET6]: Generalise tcp_v6_hash_connect Renaming it to inet6_hash_connect, making it possible to ditch dccp_v6_hash_connect and share the same code with TCP instead. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/char/random.c | 4 +- include/linux/random.h | 4 +- include/net/ipv6.h | 3 + net/dccp/ipv6.c | 171 +---------------------------------------- net/ipv6/inet6_hashtables.c | 183 +++++++++++++++++++++++++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 173 +---------------------------------------- 6 files changed, 190 insertions(+), 348 deletions(-) (limited to 'include') diff --git a/drivers/char/random.c b/drivers/char/random.c index 79b59d986af4..bdfdfd28594d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1573,7 +1573,7 @@ u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport) } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport) +u32 secure_ipv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport) { struct keydata *keyptr = get_keyptr(); u32 hash[12]; @@ -1584,7 +1584,7 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp return twothirdsMD4Transform(daddr, hash); } -EXPORT_SYMBOL(secure_tcpv6_port_ephemeral); +EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) diff --git a/include/linux/random.h b/include/linux/random.h index 01424a8e621c..5d6456bcdeba 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -53,8 +53,8 @@ void generate_random_uuid(unsigned char uuid_out[16]); extern __u32 secure_ip_id(__u32 daddr); extern u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport); -extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, - __u16 dport); +extern u32 secure_ipv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, + __u16 dport); extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport); extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 851376108ac2..e3d5d7bc8837 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -527,6 +527,9 @@ extern int inet6_getname(struct socket *sock, struct sockaddr *uaddr, extern int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +extern int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); + /* * reassembly.c */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4d078f5b911b..71bf04eb21e1 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -84,175 +84,6 @@ static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) dh->dccph_sport); } -static int __dccp_v6_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; - const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const unsigned int hash = inet6_ehashfn(daddr, inet->num, - saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); - - tw = inet_twsk(sk2); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) - goto not_unique; - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) - goto not_unique; - } - - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &dccp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 dccp_v6_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - - return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, - inet->dport); -} - -static int dccp_v6_hash_connect(struct sock *sk) -{ - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + dccp_v6_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &dccp_hashinfo.bhash[inet_bhashfn(port, - dccp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__dccp_v6_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, - head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet6_hash(&dccp_hashinfo, sk); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &dccp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &dccp_hashinfo.bhash[inet_bhashfn(snum, - dccp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet6_hash(&dccp_hashinfo, sk); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __dccp_v6_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -403,7 +234,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); - err = dccp_v6_hash_connect(sk); + err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; /* FIXME */ diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 01d5f46d4e40..4154f3a8b6cf 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -5,7 +5,8 @@ * * Generic INET6 transport hashtables * - * Authors: Lotsa people, from code originally in tcp + * Authors: Lotsa people, from code originally in tcp, generalised here + * by Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,12 +15,13 @@ */ #include - #include +#include #include #include #include +#include struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, @@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, } EXPORT_SYMBOL_GPL(inet6_lookup); + +static int __inet6_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *daddr = &np->rcv_saddr; + const struct in6_addr *saddr = &np->daddr; + const int dif = sk->sk_bound_dev_if; + const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, + inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + prefetch(head->chain.first); + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); + + tw = inet_twsk(sk2); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk2->sk_family == PF_INET6 && + ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else + goto not_unique; + } + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) + goto not_unique; + } + +unique: + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sk->sk_hash = hash; + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp != NULL) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw != NULL) { + /* Silly. Should hash-dance instead... */ + inet_twsk_deschedule(tw, death_row); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +static inline u32 inet6_sk_port_offset(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, + np->daddr.s6_addr32, + inet->dport); +} + +int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (snum == 0) { + const int low = sysctl_local_port_range[0]; + const int high = sysctl_local_port_range[1]; + const int range = high - low; + int i, port; + static u32 hint; + const u32 offset = hint + inet6_sk_port_offset(sk); + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; + head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == port) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__inet6_check_established(death_row, + sk, port, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + head, port); + if (!tb) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + hint += i; + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(port); + __inet6_hash(hinfo, sk); + } + spin_unlock(&head->lock); + + if (tw) { + inet_twsk_deschedule(tw, death_row); + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { + __inet6_hash(hinfo, sk); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __inet6_check_established(death_row, sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 514b57bb80b7..a682eb9093e1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -119,177 +119,6 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } } -static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; - const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); - - tw = inet_twsk(sk2); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) - goto not_unique; - } - -unique: - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &tcp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 tcpv6_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - - return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, - inet->dport); -} - -static int tcp_v6_hash_connect(struct sock *sk) -{ - unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + tcpv6_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__tcp_v6_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet6_hash(&tcp_hashinfo, sk); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &tcp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet6_hash(&tcp_hashinfo, sk); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __tcp_v6_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -450,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - err = tcp_v6_hash_connect(sk); + err = inet6_hash_connect(&tcp_death_row, sk); if (err) goto late_failure; -- cgit v1.2.3-70-g09d2 From 22712813620fa8e682dbfb253a60ca0131da1e07 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:56 -0800 Subject: [TCP]: Move the TCPF_ enum to tcp_states.h Upcoming patches will make, for instance, ip_sockglue.c need just this enum and not all of tcp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 16 ---------------- include/net/tcp_states.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e1434007f44..da38eea1994b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,22 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; -#define TCP_ACTION_FIN (1 << 7) - -enum { - TCPF_ESTABLISHED = (1 << 1), - TCPF_SYN_SENT = (1 << 2), - TCPF_SYN_RECV = (1 << 3), - TCPF_FIN_WAIT1 = (1 << 4), - TCPF_FIN_WAIT2 = (1 << 5), - TCPF_TIME_WAIT = (1 << 6), - TCPF_CLOSE = (1 << 7), - TCPF_CLOSE_WAIT = (1 << 8), - TCPF_LAST_ACK = (1 << 9), - TCPF_LISTEN = (1 << 10), - TCPF_CLOSING = (1 << 11) -}; - /* * The union cast uses a gcc extension to avoid aliasing problems * (union is compatible to any of its members) diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index b9d4176b2d15..b0b645988bd8 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -31,4 +31,20 @@ enum { #define TCP_STATE_MASK 0xF +#define TCP_ACTION_FIN (1 << 7) + +enum { + TCPF_ESTABLISHED = (1 << 1), + TCPF_SYN_SENT = (1 << 2), + TCPF_SYN_RECV = (1 << 3), + TCPF_FIN_WAIT1 = (1 << 4), + TCPF_FIN_WAIT2 = (1 << 5), + TCPF_TIME_WAIT = (1 << 6), + TCPF_CLOSE = (1 << 7), + TCPF_CLOSE_WAIT = (1 << 8), + TCPF_LAST_ACK = (1 << 9), + TCPF_LISTEN = (1 << 10), + TCPF_CLOSING = (1 << 11) +}; + #endif /* _LINUX_TCP_STATES_H */ -- cgit v1.2.3-70-g09d2 From d83d8461f902c672bc1bd8fbc6a94e19f092da97 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:26:10 -0800 Subject: [IP_SOCKGLUE]: Remove most of the tcp specific calls As DCCP needs to be called in the same spots. Now we have a member in inet_sock (is_icsk), set at sock creation time from struct inet_protosw->flags (if INET_PROTOSW_ICSK is set, like for TCP and DCCP) to see if a struct sock instance is a inet_connection_sock for places like the ones in ip_sockglue.c (v4 and v6) where we previously were looking if sk_type was SOCK_STREAM, that is insufficient because we now use the same code for DCCP, that has sk_type SOCK_DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ---- include/linux/ip.h | 1 + include/linux/tcp.h | 3 +-- include/net/inet_connection_sock.h | 6 +++++- include/net/protocol.h | 1 + net/dccp/diag.c | 2 +- net/dccp/input.c | 2 +- net/dccp/ipv4.c | 12 +++++++----- net/dccp/ipv6.c | 26 ++++++++++++++------------ net/dccp/output.c | 7 ++++--- net/dccp/proto.c | 2 +- net/ipv4/af_inet.c | 4 +++- net/ipv4/ip_sockglue.c | 13 ++++++------- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 10 ++++------ net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv4/tcp_output.c | 18 ++++++++++-------- net/ipv6/af_inet6.c | 1 + net/ipv6/ipv6_sockglue.c | 24 +++++++++++++----------- net/ipv6/tcp_ipv6.c | 30 +++++++++++++++++------------- 20 files changed, 97 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 71fab4311e92..d0bdb499cf8d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -408,8 +408,6 @@ struct dccp_ackvec; * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss * @dccps_timestamp_time - time of latest TIMESTAMP option * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option - * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) - * @dccps_pmtu_cookie - Last pmtu seen by socket * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet @@ -434,8 +432,6 @@ struct dccp_sock { __u32 dccps_timestamp_echo; __u32 dccps_packet_size; unsigned long dccps_ndp_count; - __u16 dccps_ext_header_len; - __u32 dccps_pmtu_cookie; __u32 dccps_mss_cache; struct dccp_options dccps_options; struct dccp_ackvec *dccps_hc_rx_ackvec; diff --git a/include/linux/ip.h b/include/linux/ip.h index 5a560daeade5..6ccc596c19c8 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -155,6 +155,7 @@ struct inet_sock { __u8 mc_ttl; /* Multicasting TTL */ __u8 pmtudisc; unsigned recverr : 1, + is_icsk : 1, /* inet_connection_sock? */ freebind : 1, hdrincl : 1, mc_loop : 1; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index da38eea1994b..f2bb2396853f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -238,10 +238,9 @@ struct tcp_sock { __u32 snd_wl1; /* Sequence for window update */ __u32 snd_wnd; /* The window we expect to receive */ __u32 max_window; /* Maximal window ever seen from peer */ - __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ - __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ + /* XXX Two bytes hole, try to pack */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e50e2b890c6d..91888967d3e3 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -60,6 +60,7 @@ struct inet_connection_sock_af_ops { * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ca_state: Congestion control state @@ -68,6 +69,7 @@ struct inet_connection_sock_af_ops { * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries * @icsk_probes_out: unanswered 0 window probes + * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -79,15 +81,17 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + __u32 icsk_pmtu_cookie; struct tcp_congestion_ops *icsk_ca_ops; struct inet_connection_sock_af_ops *icsk_af_ops; + unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; __u8 icsk_probes_out; - /* 2 BYTES HOLE, TRY TO PACK! */ + __u16 icsk_ext_hdr_len; struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ diff --git a/include/net/protocol.h b/include/net/protocol.h index 357691f6a45f..a29cb29647d0 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -76,6 +76,7 @@ struct inet_protosw { }; #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ +#define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ extern struct net_protocol *inet_protocol_base; extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; diff --git a/net/dccp/diag.c b/net/dccp/diag.c index f675d8e642d3..3f78c00e3822 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; - info->tcpi_pmtu = dp->dccps_pmtu_cookie; + info->tcpi_pmtu = icsk->icsk_pmtu_cookie; if (dp->dccps_options.dccpo_send_ack_vector) info->tcpi_options |= TCPI_OPT_SACK; diff --git a/net/dccp/input.c b/net/dccp/input.c index 9a724ff2a622..55e921bdd131 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -311,7 +311,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, goto out_invalid_packet; } - dccp_sync_mss(sk, dp->dccps_pmtu_cookie); + dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); /* * Step 10: Process REQUEST state (second part) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 671fbf3b2379..c363051a7f16 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -104,9 +104,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->dport = usin->sin_port; inet->daddr = daddr; - dp->dccps_ext_header_len = 0; + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) - dp->dccps_ext_header_len = inet->opt->optlen; + inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket @@ -191,7 +191,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && - dp->dccps_pmtu_cookie > mtu) { + inet_csk(sk)->icsk_pmtu_cookie > mtu) { dccp_sync_mss(sk, mtu); /* @@ -1051,6 +1051,7 @@ struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { int dccp_v4_init_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); @@ -1090,10 +1091,11 @@ int dccp_v4_init_sock(struct sock *sk) dccp_ctl_socket_init = 0; dccp_init_xmit_timers(sk); - inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; + icsk->icsk_rto = DCCP_TIMEOUT_INIT; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; - inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; + icsk->icsk_af_ops = &dccp_ipv4_af_ops; + icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 71bf04eb21e1..599b0be21515 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -88,6 +88,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); @@ -158,7 +159,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, */ if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = dp->dccps_ext_header_len; + u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); @@ -170,14 +171,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - inet_csk(sk)->icsk_af_ops = &dccp_ipv6_mapped; + icsk->icsk_af_ops = &dccp_ipv6_mapped; sk->sk_backlog_rcv = dccp_v4_do_rcv; err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { - dp->dccps_ext_header_len = exthdrlen; - inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } else { @@ -227,9 +228,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ip6_dst_store(sk, dst, NULL); - dp->dccps_ext_header_len = 0; + icsk->icsk_ext_hdr_len = 0; if (np->opt) - dp->dccps_ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); inet->dport = usin->sin6_port; @@ -292,7 +294,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { - struct dccp_sock *dp = dccp_sk(sk); struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) @@ -332,7 +333,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - if (dp->dccps_pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ dst_release(dst); @@ -808,7 +809,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, worked with IPv6 icsk.icsk_af_ops. Sync it now. */ - dccp_sync_mss(newsk, newdp->dccps_pmtu_cookie); + dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } @@ -916,10 +917,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, sock_kfree_s(sk, opt, opt->tot_len); } - newdp->dccps_ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) - newdp->dccps_ext_header_len = newnp->opt->opt_nflen + - newnp->opt->opt_flen; + inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + + newnp->opt->opt_flen); dccp_sync_mss(newsk, dst_mtu(dst)); @@ -1230,6 +1231,7 @@ static struct inet_protosw dccp_v6_protosw = { .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, .capability = -1, + .flags = INET_PROTOSW_ICSK, }; static int __init dccp_v6_init(void) diff --git a/net/dccp/output.c b/net/dccp/output.c index c40f7f8a328b..95a3c2c6a3ce 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -134,12 +134,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) { + struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); - int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len - + int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); /* Now subtract optional transport overhead */ - mss_now -= dp->dccps_ext_header_len; + mss_now -= icsk->icsk_ext_hdr_len; /* * FIXME: this should come from the CCID infrastructure, where, say, @@ -152,7 +153,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; /* And store cached results */ - dp->dccps_pmtu_cookie = pmtu; + icsk->icsk_pmtu_cookie = pmtu; dp->dccps_mss_cache = mss_now; return mss_now; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 51dfacd22a6e..40a4c6899051 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -712,7 +712,7 @@ static struct inet_protosw dccp_v4_protosw = { .ops = &inet_dccp_ops, .capability = -1, .no_check = 0, - .flags = 0, + .flags = INET_PROTOSW_ICSK, }; /* diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d368cf249000..617e858beff1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -302,6 +302,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); + inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; if (SOCK_RAW == sock->type) { inet->num = protocol; @@ -869,7 +870,8 @@ static struct inet_protosw inetsw_array[] = .ops = &inet_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, }, { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4f2d87257309..add019c746f8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -29,8 +29,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -427,8 +426,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, err = ip_options_get_from_user(&opt, optval, optlen); if (err) break; - if (sk->sk_type == SOCK_STREAM) { - struct tcp_sock *tp = tcp_sk(sk); + if (inet->is_icsk) { + struct inet_connection_sock *icsk = inet_csk(sk); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & @@ -436,10 +435,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, inet->daddr != LOOPBACK4_IPV6)) { #endif if (inet->opt) - tp->ext_header_len -= inet->opt->optlen; + icsk->icsk_ext_hdr_len -= inet->opt->optlen; if (opt) - tp->ext_header_len += opt->optlen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len += opt->optlen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) } #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eacfe6a3442c..00aa80e93243 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); - info->tcpi_pmtu = tp->pmtu_cookie; + info->tcpi_pmtu = icsk->icsk_pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7de6184d4bd8..981d1203b152 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2342,7 +2342,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, if (nwin > tp->max_window) { tp->max_window = nwin; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); } } } @@ -3967,12 +3967,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0); if (th->ack) { - struct inet_connection_sock *icsk; /* rfc793: * "If the state is SYN-SENT then * first check the ACK bit @@ -4061,7 +4061,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.sack_ok && sysctl_tcp_fack) tp->rx_opt.sack_ok |= 2; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); /* Remember, tcp_poll() does not lock socket! @@ -4071,8 +4071,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, mb(); tcp_set_state(sk, TCP_ESTABLISHED); - icsk = inet_csk(sk); - /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); @@ -4173,7 +4171,7 @@ discard: if (tp->ecn_flags&TCP_ECN_OK) sock_set_flag(sk, SOCK_NO_LARGESEND); - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c2fe61becd61..9b62d80bb20f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -220,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->dport = usin->sin_port; inet->daddr = daddr; - tp->ext_header_len = 0; + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt) - tp->ext_header_len = inet->opt->optlen; + inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; tp->rx_opt.mss_clamp = 536; @@ -275,7 +275,6 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs * send out by Linux are always <576bytes so they should go through @@ -304,7 +303,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && - tp->pmtu_cookie > mtu) { + inet_csk(sk)->icsk_pmtu_cookie > mtu) { tcp_sync_mss(sk, mtu); /* Resend the TCP packet because it's @@ -895,9 +894,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = skb->nh.iph->ttl; - newtp->ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newinet->opt) - newtp->ext_header_len = newinet->opt->optlen; + inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; newinet->id = newtp->write_seq ^ jiffies; tcp_sync_mss(newsk, dst_mtu(dst)); @@ -1266,6 +1265,7 @@ static int tcp_v4_init_sock(struct sock *sk) sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); icsk->icsk_af_ops = &ipv4_specific; + icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index af1946c52c37..3a0a914de917 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -621,7 +621,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) It is minimum of user_mss and mss received with SYN. It also does not include TCP options. - tp->pmtu_cookie is last pmtu, seen by this function. + inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function. tp->mss_cache is current effective sending mss, including all tcp options except for SACKs. It is evaluated, @@ -631,17 +631,18 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) NOTE1. rfc1122 clearly states that advertised MSS DOES NOT include either tcp or ip options. - NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside - this function. --ANK (980731) + NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache + are READ ONLY outside this function. --ANK (980731) */ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ - int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len - + int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr)); /* Clamp it (mss_clamp does not include tcp options) */ @@ -649,7 +650,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) mss_now = tp->rx_opt.mss_clamp; /* Now subtract optional transport overhead */ - mss_now -= tp->ext_header_len; + mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ if (mss_now < 48) @@ -663,7 +664,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); /* And store cached results */ - tp->pmtu_cookie = pmtu; + icsk->icsk_pmtu_cookie = pmtu; tp->mss_cache = mss_now; return mss_now; @@ -693,7 +694,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) if (dst) { u32 mtu = dst_mtu(dst); - if (mtu != tp->pmtu_cookie) + if (mtu != inet_csk(sk)->icsk_pmtu_cookie) mss_now = tcp_sync_mss(sk, mtu); } @@ -706,7 +707,8 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) if (doing_tso) { xmit_size_goal = (65535 - inet_csk(sk)->icsk_af_ops->net_header_len - - tp->ext_header_len - tp->tcp_header_len); + inet_csk(sk)->icsk_ext_hdr_len - + tp->tcp_header_len); if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1))) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bf17aab9b776..70a510ff31ee 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,6 +167,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); + inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; if (SOCK_RAW == sock->type) { inet->num = protocol; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b6b63fa8454c..c63868dd2ca2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sk_refcnt_debug_dec(sk); if (sk->sk_protocol == IPPROTO_TCP) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); sock_prot_dec_use(sk->sk_prot); sock_prot_inc_use(&tcp_prot); local_bh_enable(); sk->sk_prot = &tcp_prot; - inet_csk(sk)->icsk_af_ops = &ipv4_specific; + icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { local_bh_disable(); sock_prot_dec_use(sk->sk_prot); @@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, } retv = 0; - if (sk->sk_type == SOCK_STREAM) { + if (inet_sk(sk)->is_icsk) { if (opt) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len = + opt->opt_flen + opt->opt_nflen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = xchg(&np->opt, opt); @@ -380,14 +381,15 @@ sticky_done: goto done; update: retv = 0; - if (sk->sk_type == SOCK_STREAM) { + if (inet_sk(sk)->is_icsk) { if (opt) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len = + opt->opt_flen + opt->opt_nflen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = xchg(&np->opt, opt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a682eb9093e1..2947bc56d8a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -123,7 +123,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_sock *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p = NULL, final; @@ -198,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, */ if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = tp->ext_header_len; + u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); @@ -210,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - inet_csk(sk)->icsk_af_ops = &ipv6_mapped; + icsk->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { - tp->ext_header_len = exthdrlen; - inet_csk(sk)->icsk_af_ops = &ipv6_specific; + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; goto failure; } else { @@ -270,9 +271,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - tp->ext_header_len = 0; + icsk->icsk_ext_hdr_len = 0; if (np->opt) - tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -385,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - if (tp->pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ @@ -869,7 +871,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, worked with IPv6 icsk.icsk_af_ops. Sync it now. */ - tcp_sync_mss(newsk, newtp->pmtu_cookie); + tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } @@ -976,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, sock_kfree_s(sk, opt, opt->tot_len); } - newtp->ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) - newtp->ext_header_len = newnp->opt->opt_nflen + - newnp->opt->opt_flen; + inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + + newnp->opt->opt_flen); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric(dst, RTAX_ADVMSS); @@ -1361,6 +1363,7 @@ static int tcp_v6_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv6_specific; icsk->icsk_ca_ops = &tcp_init_congestion_ops; + icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -1591,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = { .ops = &inet6_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, }; void __init tcpv6_init(void) -- cgit v1.2.3-70-g09d2 From fbe9cc4a87030d5cad5f944ffaef6af7efd119e4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Dec 2005 23:26:29 -0800 Subject: [AF_UNIX]: Use spinlock for unix_table_lock This lock is actually taken mostly as a writer, so using a rwlock actually just makes performance worse especially on chips like the Intel P4. Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 34 +++++++++++++++++----------------- net/unix/garbage.c | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b5d785ab4a0e..3f302ae98c03 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -13,7 +13,7 @@ extern void unix_gc(void); #define UNIX_HASH_SIZE 256 extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -extern rwlock_t unix_table_lock; +extern spinlock_t unix_table_lock; extern atomic_t unix_tot_inflight; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1dc3685048f3..04e850e04e3d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -121,7 +121,7 @@ int sysctl_unix_max_dgram_qlen = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -DEFINE_RWLOCK(unix_table_lock); +DEFINE_SPINLOCK(unix_table_lock); static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -130,7 +130,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); /* * SMP locking strategy: - * hash table is protected with rwlock unix_table_lock + * hash table is protected with spinlock unix_table_lock * each socket state is protected by separate rwlock. */ @@ -214,16 +214,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) static inline void unix_remove_socket(struct sock *sk) { - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); __unix_remove_socket(sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) { - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); __unix_insert_socket(list, sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, @@ -250,11 +250,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, { struct sock *s; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); if (s) sock_hold(s); - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); return s; } @@ -263,7 +263,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) struct sock *s; struct hlist_node *node; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); sk_for_each(s, node, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; @@ -276,7 +276,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) } s = NULL; found: - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); return s; } @@ -642,12 +642,12 @@ retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; if (__unix_find_socket_byname(addr->name, addr->len, sock->type, addr->hash)) { - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ if (!(ordernum&0xFF)) yield(); @@ -658,7 +658,7 @@ retry: __unix_remove_socket(sk); u->addr = addr; __unix_insert_socket(&unix_socket_table[addr->hash], sk); - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); err = 0; out: up(&u->readsem); @@ -791,7 +791,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = UNIX_HASH_SIZE; } - write_lock(&unix_table_lock); + spin_lock(&unix_table_lock); if (!sunaddr->sun_path[0]) { err = -EADDRINUSE; @@ -814,7 +814,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) __unix_insert_socket(list, sk); out_unlock: - write_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); out_up: up(&u->readsem); out: @@ -1916,7 +1916,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos) static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); } @@ -1931,7 +1931,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void unix_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); } static int unix_seq_show(struct seq_file *seq, void *v) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6ffc64e1712d..411802bd4d37 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -182,7 +182,7 @@ void unix_gc(void) if (down_trylock(&unix_gc_sem)) return; - read_lock(&unix_table_lock); + spin_lock(&unix_table_lock); forall_unix_sockets(i, s) { @@ -301,7 +301,7 @@ void unix_gc(void) } u->gc_tree = GC_ORPHAN; } - read_unlock(&unix_table_lock); + spin_unlock(&unix_table_lock); /* * Here we are. Hitlist is filled. Die. -- cgit v1.2.3-70-g09d2 From c865e5d99e25a171e8262fc0f7ba608568633c64 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Dec 2005 19:03:44 -0800 Subject: [PKT_SCHED] netem: packet corruption option Here is a new feature for netem in 2.6.16. It adds the ability to randomly corrupt packets with netem. A version was done by Hagen Paul Pfeifer, but I redid it to handle the cases of backwards compatibility with netlink interface and presence of hardware checksum offload. It is useful for testing hardware offload in devices. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 7 +++++++ net/sched/sch_netem.c | 49 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e87b233615b3..d10f35338507 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -429,6 +429,7 @@ enum TCA_NETEM_CORR, TCA_NETEM_DELAY_DIST, TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, __TCA_NETEM_MAX, }; @@ -457,6 +458,12 @@ struct tc_netem_reorder __u32 correlation; }; +struct tc_netem_corrupt +{ + __u32 probability; + __u32 correlation; +}; + #define NETEM_DIST_SCALE 8192 #endif diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 82fb07aa06a5..ba5283204837 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -25,7 +25,7 @@ #include -#define VERSION "1.1" +#define VERSION "1.2" /* Network Emulation Queuing algorithm. ==================================== @@ -65,11 +65,12 @@ struct netem_sched_data { u32 jitter; u32 duplicate; u32 reorder; + u32 corrupt; struct crndstate { unsigned long last; unsigned long rho; - } delay_cor, loss_cor, dup_cor, reorder_cor; + } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; struct disttable { u32 size; @@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->duplicate = dupsave; } + /* + * Randomized packet corruption. + * Make copy if needed since we are modifying + * If packet is going to be hardware checksummed, then + * do it now in software before we mangle it. + */ + if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) + || (skb->ip_summed == CHECKSUM_HW + && skb_checksum_help(skb, 0))) { + sch->qstats.drops++; + return NET_XMIT_DROP; + } + + skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); + } + if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { @@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) return 0; } +static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct tc_netem_corrupt *r = RTA_DATA(attr); + + if (RTA_PAYLOAD(attr) != sizeof(*r)) + return -EINVAL; + + q->corrupt = r->probability; + init_crandom(&q->corrupt_cor, r->correlation); + return 0; +} + +/* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct rtattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) if (ret) return ret; } + if (tb[TCA_NETEM_REORDER-1]) { ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); if (ret) return ret; } - } + if (tb[TCA_NETEM_CORRUPT-1]) { + ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); + if (ret) + return ret; + } + } return 0; } @@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_qopt qopt; struct tc_netem_corr cor; struct tc_netem_reorder reorder; + struct tc_netem_corrupt corrupt; qopt.latency = q->latency; qopt.jitter = q->jitter; @@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) reorder.correlation = q->reorder_cor.rho; RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); + corrupt.probability = q->corrupt; + corrupt.correlation = q->corrupt_cor.rho; + RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); + rta->rta_len = skb->tail - b; return skb->len; -- cgit v1.2.3-70-g09d2 From 3821af2fe13700cab6fd67367128fa180e43f8b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Dec 2005 19:30:53 -0800 Subject: [FLS64]: generic version Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/asm-alpha/bitops.h | 1 + include/asm-arm/bitops.h | 2 ++ include/asm-arm26/bitops.h | 1 + include/asm-cris/bitops.h | 1 + include/asm-frv/bitops.h | 1 + include/asm-generic/bitops.h | 1 + include/asm-h8300/bitops.h | 1 + include/asm-i386/bitops.h | 1 + include/asm-ia64/bitops.h | 1 + include/asm-m32r/bitops.h | 1 + include/asm-m68k/bitops.h | 1 + include/asm-m68knommu/bitops.h | 1 + include/asm-mips/bitops.h | 2 +- include/asm-parisc/bitops.h | 1 + include/asm-powerpc/bitops.h | 1 + include/asm-s390/bitops.h | 1 + include/asm-sh/bitops.h | 1 + include/asm-sh64/bitops.h | 1 + include/asm-sparc/bitops.h | 1 + include/asm-sparc64/bitops.h | 1 + include/asm-v850/bitops.h | 1 + include/asm-x86_64/bitops.h | 1 + include/asm-xtensa/bitops.h | 1 + include/linux/bitops.h | 9 +++++++++ 24 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h index 578ed3f1a607..302201f1a097 100644 --- a/include/asm-alpha/bitops.h +++ b/include/asm-alpha/bitops.h @@ -321,6 +321,7 @@ static inline int fls(int word) #else #define fls generic_fls #endif +#define fls64 generic_fls64 /* Compute powers of two for the given integer. */ static inline long floor_log2(unsigned long word) diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h index 7399d431edfe..d02de721ecc1 100644 --- a/include/asm-arm/bitops.h +++ b/include/asm-arm/bitops.h @@ -332,6 +332,7 @@ static inline unsigned long __ffs(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * ffs: find first bit set. This is defined the same way as @@ -351,6 +352,7 @@ static inline unsigned long __ffs(unsigned long word) #define fls(x) \ ( __builtin_constant_p(x) ? generic_fls(x) : \ ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; }) ) +#define fls64(x) generic_fls64(x) #define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); }) #define __ffs(x) (ffs(x) - 1) #define ffz(x) __ffs( ~(x) ) diff --git a/include/asm-arm26/bitops.h b/include/asm-arm26/bitops.h index 7d062fb2e343..15cc6f2da792 100644 --- a/include/asm-arm26/bitops.h +++ b/include/asm-arm26/bitops.h @@ -259,6 +259,7 @@ static inline unsigned long __ffs(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * ffs: find first bit set. This is defined the same way as diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h index 1bddb3f3a289..d3eb0f1e4208 100644 --- a/include/asm-cris/bitops.h +++ b/include/asm-cris/bitops.h @@ -240,6 +240,7 @@ static inline int test_bit(int nr, const volatile unsigned long *addr) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * hweightN - returns the hamming weight of a N-bit word diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h index b664bd5b6663..02be7b3a8a83 100644 --- a/include/asm-frv/bitops.h +++ b/include/asm-frv/bitops.h @@ -228,6 +228,7 @@ found_middle: \ bit ? 33 - bit : bit; \ }) +#define fls64(x) generic_fls64(x) /* * Every architecture must define this function. It's the fastest diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index ce31b739fd80..0e6d9852008c 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -56,6 +56,7 @@ extern __inline__ int test_bit(int nr, const unsigned long * addr) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h index 5036f595f8c9..c0411ec9d651 100644 --- a/include/asm-h8300/bitops.h +++ b/include/asm-h8300/bitops.h @@ -406,5 +406,6 @@ found_middle: #endif /* __KERNEL__ */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* _H8300_BITOPS_H */ diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h index ddf1739dc7fd..4807aa1d2e3d 100644 --- a/include/asm-i386/bitops.h +++ b/include/asm-i386/bitops.h @@ -372,6 +372,7 @@ static inline unsigned long ffz(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h index 7232528e2d0c..36d0fb95ea89 100644 --- a/include/asm-ia64/bitops.h +++ b/include/asm-ia64/bitops.h @@ -345,6 +345,7 @@ fls (int t) x |= x >> 16; return ia64_popcnt(x); } +#define fls64(x) generic_fls64(x) /* * ffs: find first bit set. This is defined the same way as the libc and compiler builtin diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h index e78443981349..abea2fdd8689 100644 --- a/include/asm-m32r/bitops.h +++ b/include/asm-m32r/bitops.h @@ -465,6 +465,7 @@ static __inline__ unsigned long __ffs(unsigned long word) * fls: find last bit set. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h index b1bcf7c66516..13f4c0048463 100644 --- a/include/asm-m68k/bitops.h +++ b/include/asm-m68k/bitops.h @@ -310,6 +310,7 @@ static inline int fls(int x) return 32 - cnt; } +#define fls64(x) generic_fls64(x) /* * Every architecture must define this function. It's the fastest diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h index c42f88a9b9f9..4058dd086a02 100644 --- a/include/asm-m68knommu/bitops.h +++ b/include/asm-m68knommu/bitops.h @@ -499,5 +499,6 @@ found_middle: * fls: find last bit set. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* _M68KNOMMU_BITOPS_H */ diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h index 5496f9064a6a..3b0c8aaf6e8b 100644 --- a/include/asm-mips/bitops.h +++ b/include/asm-mips/bitops.h @@ -695,7 +695,7 @@ static inline unsigned long fls(unsigned long word) return flz(~word) + 1; } - +#define fls64(x) generic_fls64(x) /* * find_next_zero_bit - find the first zero bit in a memory region diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h index 55b98c67fd82..15d8c2b51584 100644 --- a/include/asm-parisc/bitops.h +++ b/include/asm-parisc/bitops.h @@ -263,6 +263,7 @@ static __inline__ int fls(int x) return ret; } +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h index 5727229b0444..1996eaa8aeae 100644 --- a/include/asm-powerpc/bitops.h +++ b/include/asm-powerpc/bitops.h @@ -310,6 +310,7 @@ static __inline__ int fls(unsigned int x) asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); return 32 - lz; } +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h index b07c578b22ea..61232760cc3b 100644 --- a/include/asm-s390/bitops.h +++ b/include/asm-s390/bitops.h @@ -839,6 +839,7 @@ static inline int sched_find_first_bit(unsigned long *b) * fls: find last bit set. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h index 5163d1ff2f1b..1c5260860045 100644 --- a/include/asm-sh/bitops.h +++ b/include/asm-sh/bitops.h @@ -470,6 +470,7 @@ found_middle: */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h index e1ff63e09227..ce9c3ad45fe0 100644 --- a/include/asm-sh64/bitops.h +++ b/include/asm-sh64/bitops.h @@ -510,6 +510,7 @@ found_middle: #define ffs(x) generic_ffs(x) #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h index bfbd795a0a80..41722b5e45ef 100644 --- a/include/asm-sparc/bitops.h +++ b/include/asm-sparc/bitops.h @@ -298,6 +298,7 @@ static inline int ffs(int x) * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h index 6388b8376c50..6efc0162fb09 100644 --- a/include/asm-sparc64/bitops.h +++ b/include/asm-sparc64/bitops.h @@ -119,6 +119,7 @@ static inline unsigned long __ffs(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h index b91e799763fd..8955d2376ac8 100644 --- a/include/asm-v850/bitops.h +++ b/include/asm-v850/bitops.h @@ -276,6 +276,7 @@ found_middle: #define ffs(x) generic_ffs (x) #define fls(x) generic_fls (x) +#define fls64(x) generic_fls64(x) #define __ffs(x) ffs(x) diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index 05a0d374404b..94b52c8ce97f 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h @@ -409,6 +409,7 @@ static __inline__ int ffs(int x) /* find last set bit */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h index e76ee889e21d..0a2065f1a372 100644 --- a/include/asm-xtensa/bitops.h +++ b/include/asm-xtensa/bitops.h @@ -245,6 +245,7 @@ static __inline__ int fls (unsigned int x) { return __cntlz(x); } +#define fls64(x) generic_fls64(x) static __inline__ int find_next_bit(const unsigned long *addr, int size, int offset) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 38c2fb7ebe09..6a2a19f14bb2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -76,6 +76,15 @@ static __inline__ int generic_fls(int x) */ #include + +static inline int generic_fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(x) + 32; + return fls(x); +} + static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3-70-g09d2 From 90933fc8ba5cc9034e3c04ee19938a22b0b4fe4e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Dec 2005 19:31:36 -0800 Subject: [FLS64]: x86_64 version Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/asm-x86_64/bitops.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index 94b52c8ce97f..a4d5d0909453 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h @@ -340,6 +340,20 @@ static __inline__ unsigned long __ffs(unsigned long word) return word; } +/* + * __fls: find last bit set. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static __inline__ unsigned long __fls(unsigned long word) +{ + __asm__("bsrq %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + #ifdef __KERNEL__ static inline int sched_find_first_bit(const unsigned long *b) @@ -369,6 +383,19 @@ static __inline__ int ffs(int x) return r+1; } +/** + * fls64 - find last bit set in 64 bit word + * @x: the word to search + * + * This is defined the same way as fls. + */ +static __inline__ int fls64(__u64 x) +{ + if (x == 0) + return 0; + return __fls(x) + 1; +} + /** * hweightN - returns the hamming weight of a N-bit word * @x: the word to weigh @@ -409,7 +436,6 @@ static __inline__ int ffs(int x) /* find last set bit */ #define fls(x) generic_fls(x) -#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 52ccb8e90c0ace233b8b740f2fc5de0dbd706b27 Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Thu, 22 Dec 2005 11:36:46 -0800 Subject: [SCTP]: Update SCTP_PEER_ADDR_PARAMS socket option to the latest api draft. This patch adds support to set/get heartbeat interval, maximum number of retransmissions, pathmtu, sackdelay time for a particular transport/ association/socket as per the latest SCTP sockets api draft11. Signed-off-by: Frank Filz Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 76 +++++-- include/net/sctp/user.h | 16 ++ net/sctp/associola.c | 81 ++++--- net/sctp/input.c | 36 +++- net/sctp/output.c | 17 +- net/sctp/sm_sideeffect.c | 26 ++- net/sctp/sm_statefuns.c | 20 +- net/sctp/socket.c | 511 ++++++++++++++++++++++++++++++++++----------- net/sctp/transport.c | 32 +-- 9 files changed, 595 insertions(+), 220 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8e7794ee27ff..f5c22d77feab 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -277,6 +277,24 @@ struct sctp_sock { __u32 default_context; __u32 default_timetolive; + /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to + * the destination address every heartbeat interval. This value + * will be inherited by all new associations. + */ + __u32 hbinterval; + + /* This is the max_retrans value for new associations. */ + __u16 pathmaxrxt; + + /* The initial Path MTU to use for new associations. */ + __u32 pathmtu; + + /* The default SACK delay timeout for new associations. */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + struct sctp_initmsg initmsg; struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; @@ -845,9 +863,6 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; - /* PMTU : The current known path MTU. */ - __u32 pmtu; - /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -862,7 +877,22 @@ struct sctp_transport { /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ - int hb_interval; + __u32 hbinterval; + + /* This is the max_retrans value for the transport and will + * be initialized from the assocs value. This can be changed + * using SCTP_SET_PEER_ADDR_PARAMS socket option. + */ + __u16 pathmaxrxt; + + /* PMTU : The current known path MTU. */ + __u32 pathmtu; + + /* SACK delay timeout */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; /* When was the last time (in jiffies) that we heard from this * transport? We use this to pick new active and retran paths. @@ -882,22 +912,11 @@ struct sctp_transport { */ int state; - /* hb_allowed : The current heartbeat state of this destination, - * : i.e. ALLOW-HB, NO-HEARTBEAT, etc. - */ - int hb_allowed; - /* These are the error stats for this destination. */ /* Error count : The current error count for this destination. */ unsigned short error_count; - /* This is the max_retrans value for the transport and will - * be initialized to proto.max_retrans.path. This can be changed - * using SCTP_SET_PEER_ADDR_PARAMS socket option. - */ - int max_retrans; - /* Per : A timer used by each destination. * Destination : * Timer : @@ -1502,6 +1521,28 @@ struct sctp_association { /* The largest timeout or RTO value to use in attempting an INIT */ __u16 max_init_timeo; + /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to + * the destination address every heartbeat interval. This value + * will be inherited by all new transports. + */ + __u32 hbinterval; + + /* This is the max_retrans value for new transports in the + * association. + */ + __u16 pathmaxrxt; + + /* Association : The smallest PMTU discovered for all of the + * PMTU : peer's transport addresses. + */ + __u32 pathmtu; + + /* SACK delay timeout */ + __u32 sackdelay; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + int timeouts[SCTP_NUM_TIMEOUT_TYPES]; struct timer_list timers[SCTP_NUM_TIMEOUT_TYPES]; @@ -1571,11 +1612,6 @@ struct sctp_association { */ wait_queue_head_t wait; - /* Association : The smallest PMTU discovered for all of the - * PMTU : peer's transport addresses. - */ - __u32 pmtu; - /* The message size at which SCTP fragmentation will occur. */ __u32 frag_point; diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index f1c3bc54526a..b9052864fa5a 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -503,11 +503,27 @@ struct sctp_setadaption { * unreachable. The following structure is used to access and modify an * address's parameters: */ +enum sctp_spp_flags { + SPP_HB_ENABLE = 1, /*Enable heartbeats*/ + SPP_HB_DISABLE = 2, /*Disable heartbeats*/ + SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE, + SPP_HB_DEMAND = 4, /*Send heartbeat immediately*/ + SPP_PMTUD_ENABLE = 8, /*Enable PMTU discovery*/ + SPP_PMTUD_DISABLE = 16, /*Disable PMTU discovery*/ + SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE, + SPP_SACKDELAY_ENABLE = 32, /*Enable SACK*/ + SPP_SACKDELAY_DISABLE = 64, /*Disable SACK*/ + SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE, +}; + struct sctp_paddrparams { sctp_assoc_t spp_assoc_id; struct sockaddr_storage spp_address; __u32 spp_hbinterval; __u16 spp_pathmaxrxt; + __u32 spp_pathmtu; + __u32 spp_sackdelay; + __u32 spp_flags; } __attribute__((packed, aligned(4))); /* diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dec68a604773..9d05e13e92f6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -110,7 +110,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) * 1000; - asoc->pmtu = 0; asoc->frag_point = 0; /* Set the association max_retrans and RTO values from the @@ -123,6 +122,25 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->overall_error_count = 0; + /* Initialize the association's heartbeat interval based on the + * sock configured value. + */ + asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); + + /* Initialize path max retrans value. */ + asoc->pathmaxrxt = sp->pathmaxrxt; + + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; + + /* Set association default SACK delay */ + asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); + + /* Set the association default flags controlling + * Heartbeat, SACK delay, and Path MTU Discovery. + */ + asoc->param_flags = sp->param_flags; + /* Initialize the maximum mumber of new data packets that can be sent * in a burst. */ @@ -144,8 +162,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a = 5 * asoc->rto_max; asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; - asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = - SCTP_DEFAULT_TIMEOUT_SACK; + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; @@ -540,23 +557,46 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, sctp_transport_set_owner(peer, asoc); + /* Initialize the peer's heartbeat interval based on the + * association configured value. + */ + peer->hbinterval = asoc->hbinterval; + + /* Set the path max_retrans. */ + peer->pathmaxrxt = asoc->pathmaxrxt; + + /* Initialize the peer's SACK delay timeout based on the + * association configured value. + */ + peer->sackdelay = asoc->sackdelay; + + /* Enable/disable heartbeat, SACK delay, and path MTU discovery + * based on association setting. + */ + peer->param_flags = asoc->param_flags; + /* Initialize the pmtu of the transport. */ - sctp_transport_pmtu(peer); + if (peer->param_flags & SPP_PMTUD_ENABLE) + sctp_transport_pmtu(peer); + else if (asoc->pathmtu) + peer->pathmtu = asoc->pathmtu; + else + peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT; /* If this is the first transport addr on this association, * initialize the association PMTU to the peer's PMTU. * If not and the current association PMTU is higher than the new * peer's PMTU, reset the association PMTU to the new peer's PMTU. */ - if (asoc->pmtu) - asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu); + if (asoc->pathmtu) + asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu); else - asoc->pmtu = peer->pmtu; + asoc->pathmtu = peer->pathmtu; SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " - "%d\n", asoc, asoc->pmtu); + "%d\n", asoc, asoc->pathmtu); - asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); + asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); /* The asoc->peer.port might not be meaningful yet, but * initialize the packet structure anyway. @@ -574,7 +614,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, * (for example, implementations MAY use the size of the * receiver advertised window). */ - peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380)); + peer->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); /* At this point, we may not have the receiver's advertised window, * so initialize ssthresh to the default value and it will be set @@ -585,17 +625,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; - /* By default, enable heartbeat for peer address. */ - peer->hb_allowed = 1; - - /* Initialize the peer's heartbeat interval based on the - * sock configured value. - */ - peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval); - - /* Set the path max_retrans. */ - peer->max_retrans = sp->paddrparam.spp_pathmaxrxt; - /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; @@ -1155,18 +1184,18 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); - if (!pmtu || (t->pmtu < pmtu)) - pmtu = t->pmtu; + if (!pmtu || (t->pathmtu < pmtu)) + pmtu = t->pathmtu; } if (pmtu) { struct sctp_sock *sp = sctp_sk(asoc->base.sk); - asoc->pmtu = pmtu; + asoc->pathmtu = pmtu; asoc->frag_point = sctp_frag_point(sp, pmtu); } SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", - __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point); + __FUNCTION__, asoc, asoc->pathmtu, asoc->frag_point); } /* Should we send a SACK to update our peer? */ @@ -1179,7 +1208,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu))) + min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/input.c b/net/sctp/input.c index b24ff2c1aef5..238f1bffa684 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -305,18 +305,36 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", __FUNCTION__, pmtu, - SCTP_DEFAULT_MINSEGMENT); - pmtu = SCTP_DEFAULT_MINSEGMENT; - } + if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu)) + return; - if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) { - t->pmtu = pmtu; + if (t->param_flags & SPP_PMTUD_ENABLE) { + if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { + printk(KERN_WARNING "%s: Reported pmtu %d too low, " + "using default minimum of %d\n", + __FUNCTION__, pmtu, + SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment size and disable + * pmtu discovery on this transport. + */ + t->pathmtu = SCTP_DEFAULT_MINSEGMENT; + t->param_flags = (t->param_flags & ~SPP_HB) | + SPP_PMTUD_DISABLE; + } else { + t->pathmtu = pmtu; + } + + /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); - sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } + + /* Retransmit with the new pmtu setting. + * Normally, if PMTU discovery is disabled, an ICMP Fragmentation + * Needed will never be sent, but if a message was sent before + * PMTU discovery was disabled that was larger than the PMTU, it + * would not be fragmented, so it must be re-transmitted fragmented. + */ + sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } /* diff --git a/net/sctp/output.c b/net/sctp/output.c index 931371633464..a40991ef72c9 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -234,8 +234,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, goto finish; pmtu = ((packet->transport->asoc) ? - (packet->transport->asoc->pmtu) : - (packet->transport->pmtu)); + (packet->transport->asoc->pathmtu) : + (packet->transport->pathmtu)); too_big = (psize + chunk_len > pmtu); @@ -482,7 +482,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!dst || (dst->obsolete > 1)) { dst_release(dst); sctp_transport_route(tp, NULL, sctp_sk(sk)); - sctp_assoc_sync_pmtu(asoc); + if (asoc->param_flags & SPP_PMTUD_ENABLE) { + sctp_assoc_sync_pmtu(asoc); + } } nskb->dst = dst_clone(tp->dst); @@ -492,7 +494,10 @@ int sctp_packet_transmit(struct sctp_packet *packet) SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", nskb->len); - (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); + if (tp->param_flags & SPP_PMTUD_ENABLE) + (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); + else + (*tp->af_specific->sctp_xmit)(nskb, tp, 1); out: packet->size = packet->overhead; @@ -577,7 +582,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * if ((flightsize + Max.Burst * MTU) < cwnd) * cwnd = flightsize + Max.Burst * MTU */ - max_burst_bytes = asoc->max_burst * asoc->pmtu; + max_burst_bytes = asoc->max_burst * asoc->pathmtu; if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { transport->cwnd = transport->flight_size + max_burst_bytes; SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " @@ -622,7 +627,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * data will fit or delay in hopes of bundling a full * sized packet. */ - if (len < asoc->pmtu - packet->overhead) { + if (len < asoc->pathmtu - packet->overhead) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 823947170a33..2d7d8a5db2ac 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -157,9 +157,12 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, { __u32 ctsn, max_tsn_seen; struct sctp_chunk *sack; + struct sctp_transport *trans = asoc->peer.last_data_from; int error = 0; - if (force) + if (force || + (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) || + (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE))) asoc->peer.sack_needed = 1; ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); @@ -189,7 +192,22 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, if (!asoc->peer.sack_needed) { /* We will need a SACK for the next packet. */ asoc->peer.sack_needed = 1; - goto out; + + /* Set the SACK delay timeout based on the + * SACK delay for the last transport + * data was received from, or the default + * for the association. + */ + if (trans) + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = + trans->sackdelay; + else + asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = + asoc->sackdelay; + + /* Restart the SACK timer. */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, + SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { if (asoc->a_rwnd > asoc->rwnd) asoc->a_rwnd = asoc->rwnd; @@ -205,7 +223,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } -out: + return error; nomem: error = -ENOMEM; @@ -415,7 +433,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, asoc->overall_error_count++; if (transport->state != SCTP_INACTIVE && - (transport->error_count++ >= transport->max_retrans)) { + (transport->error_count++ >= transport->pathmaxrxt)) { SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", " transport IP: port:%d failed.\n", asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 475bfb4972d9..557a7d90b92a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -900,7 +900,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, * HEARTBEAT is sent (see Section 8.3). */ - if (transport->hb_allowed) { + if (transport->param_flags & SPP_HB_ENABLE) { if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type, arg, commands)) @@ -1051,7 +1051,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DISCARD; } - max_interval = link->hb_interval + link->rto; + max_interval = link->hbinterval + link->rto; /* Check if the timestamp looks valid. */ if (time_after(hbinfo->sent_at, jiffies) || @@ -2691,14 +2691,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, * document allow. However, an SCTP transmitter MUST NOT be * more aggressive than the following algorithms allow. */ - if (chunk->end_of_packet) { + if (chunk->end_of_packet) sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); - /* Start the SACK timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); - } - return SCTP_DISPOSITION_CONSUME; discard_force: @@ -2721,13 +2716,9 @@ discard_force: return SCTP_DISPOSITION_DISCARD; discard_noforce: - if (chunk->end_of_packet) { + if (chunk->end_of_packet) sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); - /* Start the SACK timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); - } return SCTP_DISPOSITION_DISCARD; consume: return SCTP_DISPOSITION_CONSUME; @@ -3442,9 +3433,6 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, * send another. */ sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); - /* Start the SACK timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, - SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); return SCTP_DISPOSITION_CONSUME; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9df888e932c5..adb5ee62c2a6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1941,106 +1941,275 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, * address's parameters: * * struct sctp_paddrparams { - * sctp_assoc_t spp_assoc_id; - * struct sockaddr_storage spp_address; - * uint32_t spp_hbinterval; - * uint16_t spp_pathmaxrxt; - * }; - * - * spp_assoc_id - (UDP style socket) This is filled in the application, - * and identifies the association for this query. + * sctp_assoc_t spp_assoc_id; + * struct sockaddr_storage spp_address; + * uint32_t spp_hbinterval; + * uint16_t spp_pathmaxrxt; + * uint32_t spp_pathmtu; + * uint32_t spp_sackdelay; + * uint32_t spp_flags; + * }; + * + * spp_assoc_id - (one-to-many style socket) This is filled in the + * application, and identifies the association for + * this query. * spp_address - This specifies which address is of interest. * spp_hbinterval - This contains the value of the heartbeat interval, - * in milliseconds. A value of 0, when modifying the - * parameter, specifies that the heartbeat on this - * address should be disabled. A value of UINT32_MAX - * (4294967295), when modifying the parameter, - * specifies that a heartbeat should be sent - * immediately to the peer address, and the current - * interval should remain unchanged. + * in milliseconds. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. * spp_pathmaxrxt - This contains the maximum number of * retransmissions before this address shall be - * considered unreachable. + * considered unreachable. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. + * spp_pathmtu - When Path MTU discovery is disabled the value + * specified here will be the "fixed" path mtu. + * Note that if the spp_address field is empty + * then all associations on this address will + * have this fixed path mtu set upon them. + * + * spp_sackdelay - When delayed sack is enabled, this value specifies + * the number of milliseconds that sacks will be delayed + * for. This value will apply to all addresses of an + * association if the spp_address field is empty. Note + * also, that if delayed sack is enabled and this + * value is set to 0, no change is made to the last + * recorded delayed sack timer value. + * + * spp_flags - These flags are used to control various features + * on an association. The flag field may contain + * zero or more of the following options. + * + * SPP_HB_ENABLE - Enable heartbeats on the + * specified address. Note that if the address + * field is empty all addresses for the association + * have heartbeats enabled upon them. + * + * SPP_HB_DISABLE - Disable heartbeats on the + * speicifed address. Note that if the address + * field is empty all addresses for the association + * will have their heartbeats disabled. Note also + * that SPP_HB_ENABLE and SPP_HB_DISABLE are + * mutually exclusive, only one of these two should + * be specified. Enabling both fields will have + * undetermined results. + * + * SPP_HB_DEMAND - Request a user initiated heartbeat + * to be made immediately. + * + * SPP_PMTUD_ENABLE - This field will enable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. + * + * SPP_PMTUD_DISABLE - This field will disable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. Not also that + * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually + * exclusive. Enabling both will have undetermined + * results. + * + * SPP_SACKDELAY_ENABLE - Setting this flag turns + * on delayed sack. The time specified in spp_sackdelay + * is used to specify the sack delay for this address. Note + * that if spp_address is empty then all addresses will + * enable delayed sack and take on the sack delay + * value specified in spp_sackdelay. + * SPP_SACKDELAY_DISABLE - Setting this flag turns + * off delayed sack. If the spp_address field is blank then + * delayed sack is disabled for the entire association. Note + * also that this field is mutually exclusive to + * SPP_SACKDELAY_ENABLE, setting both will have undefined + * results. */ +int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, + struct sctp_transport *trans, + struct sctp_association *asoc, + struct sctp_sock *sp, + int hb_change, + int pmtud_change, + int sackdelay_change) +{ + int error; + + if (params->spp_flags & SPP_HB_DEMAND && trans) { + error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); + if (error) + return error; + } + + if (params->spp_hbinterval) { + if (trans) { + trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + } else if (asoc) { + asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + } else { + sp->hbinterval = params->spp_hbinterval; + } + } + + if (hb_change) { + if (trans) { + trans->param_flags = + (trans->param_flags & ~SPP_HB) | hb_change; + } else if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_HB) | hb_change; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_HB) | hb_change; + } + } + + if (params->spp_pathmtu) { + if (trans) { + trans->pathmtu = params->spp_pathmtu; + sctp_assoc_sync_pmtu(asoc); + } else if (asoc) { + asoc->pathmtu = params->spp_pathmtu; + sctp_frag_point(sp, params->spp_pathmtu); + } else { + sp->pathmtu = params->spp_pathmtu; + } + } + + if (pmtud_change) { + if (trans) { + int update = (trans->param_flags & SPP_PMTUD_DISABLE) && + (params->spp_flags & SPP_PMTUD_ENABLE); + trans->param_flags = + (trans->param_flags & ~SPP_PMTUD) | pmtud_change; + if (update) { + sctp_transport_pmtu(trans); + sctp_assoc_sync_pmtu(asoc); + } + } else if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_PMTUD) | pmtud_change; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_PMTUD) | pmtud_change; + } + } + + if (params->spp_sackdelay) { + if (trans) { + trans->sackdelay = + msecs_to_jiffies(params->spp_sackdelay); + } else if (asoc) { + asoc->sackdelay = + msecs_to_jiffies(params->spp_sackdelay); + } else { + sp->sackdelay = params->spp_sackdelay; + } + } + + if (sackdelay_change) { + if (trans) { + trans->param_flags = + (trans->param_flags & ~SPP_SACKDELAY) | + sackdelay_change; + } else if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_SACKDELAY) | + sackdelay_change; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_SACKDELAY) | + sackdelay_change; + } + } + + if (params->spp_pathmaxrxt) { + if (trans) { + trans->pathmaxrxt = params->spp_pathmaxrxt; + } else if (asoc) { + asoc->pathmaxrxt = params->spp_pathmaxrxt; + } else { + sp->pathmaxrxt = params->spp_pathmaxrxt; + } + } + + return 0; +} + static int sctp_setsockopt_peer_addr_params(struct sock *sk, char __user *optval, int optlen) { - struct sctp_paddrparams params; - struct sctp_transport *trans; + struct sctp_paddrparams params; + struct sctp_transport *trans = NULL; + struct sctp_association *asoc = NULL; + struct sctp_sock *sp = sctp_sk(sk); int error; + int hb_change, pmtud_change, sackdelay_change; if (optlen != sizeof(struct sctp_paddrparams)) - return -EINVAL; + return - EINVAL; + if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - /* - * API 7. Socket Options (setting the default value for the endpoint) - * All options that support specific settings on an association by - * filling in either an association id variable or a sockaddr_storage - * SHOULD also support setting of the same value for the entire endpoint - * (i.e. future associations). To accomplish this the following logic is - * used when setting one of these options: - - * c) If neither the sockaddr_storage or association identification is - * set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and - * the association identification is 0, the settings are a default - * and to be applied to the endpoint (all future associations). - */ + /* Validate flags and value parameters. */ + hb_change = params.spp_flags & SPP_HB; + pmtud_change = params.spp_flags & SPP_PMTUD; + sackdelay_change = params.spp_flags & SPP_SACKDELAY; + + if (hb_change == SPP_HB || + pmtud_change == SPP_PMTUD || + sackdelay_change == SPP_SACKDELAY || + params.spp_sackdelay > 500 || + (params.spp_pathmtu + && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) + return -EINVAL; - /* update default value for endpoint (all future associations) */ - if (!params.spp_assoc_id && - sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { - /* Manual heartbeat on an endpoint is invalid. */ - if (0xffffffff == params.spp_hbinterval) + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + trans = sctp_addr_id2transport(sk, ¶ms.spp_address, + params.spp_assoc_id); + if (!trans) return -EINVAL; - else if (params.spp_hbinterval) - sctp_sk(sk)->paddrparam.spp_hbinterval = - params.spp_hbinterval; - if (params.spp_pathmaxrxt) - sctp_sk(sk)->paddrparam.spp_pathmaxrxt = - params.spp_pathmaxrxt; - return 0; } - trans = sctp_addr_id2transport(sk, ¶ms.spp_address, - params.spp_assoc_id); - if (!trans) + /* Get association, if assoc_id != 0 and the socket is a one + * to many style socket, and an association was not found, then + * the id was invalid. + */ + asoc = sctp_id2assoc(sk, params.spp_assoc_id); + if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - /* Applications can enable or disable heartbeats for any peer address - * of an association, modify an address's heartbeat interval, force a - * heartbeat to be sent immediately, and adjust the address's maximum - * number of retransmissions sent before an address is considered - * unreachable. - * - * The value of the heartbeat interval, in milliseconds. A value of - * UINT32_MAX (4294967295), when modifying the parameter, specifies - * that a heartbeat should be sent immediately to the peer address, - * and the current interval should remain unchanged. - */ - if (0xffffffff == params.spp_hbinterval) { - error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); - if (error) - return error; - } else { - /* The value of the heartbeat interval, in milliseconds. A value of 0, - * when modifying the parameter, specifies that the heartbeat on this - * address should be disabled. + /* Heartbeat demand can only be sent on a transport or + * association, but not a socket. */ - if (params.spp_hbinterval) { - trans->hb_allowed = 1; - trans->hb_interval = - msecs_to_jiffies(params.spp_hbinterval); - } else - trans->hb_allowed = 0; - } + if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc) + return -EINVAL; + + /* Process parameters. */ + error = sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, + hb_change, pmtud_change, + sackdelay_change); - /* spp_pathmaxrxt contains the maximum number of retransmissions - * before this address shall be considered unreachable. + if (error) + return error; + + /* If changes are for association, also apply parameters to each + * transport. */ - if (params.spp_pathmaxrxt) - trans->max_retrans = params.spp_pathmaxrxt; + if (!trans && asoc) { + struct list_head *pos; + + list_for_each(pos, &asoc->peer.transport_addr_list) { + trans = list_entry(pos, struct sctp_transport, + transports); + sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, + hb_change, pmtud_change, + sackdelay_change); + } + } return 0; } @@ -2334,7 +2503,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl /* Update the frag_point of the existing associations. */ list_for_each(pos, &(sp->ep->asocs)) { asoc = list_entry(pos, struct sctp_association, asocs); - asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); + asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); } return 0; @@ -2715,8 +2884,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval); - sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path; + sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); + sp->pathmaxrxt = sctp_max_retrans_path; + sp->pathmtu = 0; // allow default discovery + sp->sackdelay = sctp_sack_timeout; + sp->param_flags = SPP_HB_ENABLE | + SPP_PMTUD_ENABLE | + SPP_SACKDELAY_ENABLE; /* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. @@ -2865,7 +3039,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_primary.spinfo_cwnd = transport->cwnd; status.sstat_primary.spinfo_srtt = transport->srtt; status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); - status.sstat_primary.spinfo_mtu = transport->pmtu; + status.sstat_primary.spinfo_mtu = transport->pathmtu; if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) status.sstat_primary.spinfo_state = SCTP_ACTIVE; @@ -2924,7 +3098,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, pinfo.spinfo_cwnd = transport->cwnd; pinfo.spinfo_srtt = transport->srtt; pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); - pinfo.spinfo_mtu = transport->pmtu; + pinfo.spinfo_mtu = transport->pathmtu; if (pinfo.spinfo_state == SCTP_UNKNOWN) pinfo.spinfo_state = SCTP_ACTIVE; @@ -3086,69 +3260,154 @@ out: * address's parameters: * * struct sctp_paddrparams { - * sctp_assoc_t spp_assoc_id; - * struct sockaddr_storage spp_address; - * uint32_t spp_hbinterval; - * uint16_t spp_pathmaxrxt; - * }; - * - * spp_assoc_id - (UDP style socket) This is filled in the application, - * and identifies the association for this query. + * sctp_assoc_t spp_assoc_id; + * struct sockaddr_storage spp_address; + * uint32_t spp_hbinterval; + * uint16_t spp_pathmaxrxt; + * uint32_t spp_pathmtu; + * uint32_t spp_sackdelay; + * uint32_t spp_flags; + * }; + * + * spp_assoc_id - (one-to-many style socket) This is filled in the + * application, and identifies the association for + * this query. * spp_address - This specifies which address is of interest. * spp_hbinterval - This contains the value of the heartbeat interval, - * in milliseconds. A value of 0, when modifying the - * parameter, specifies that the heartbeat on this - * address should be disabled. A value of UINT32_MAX - * (4294967295), when modifying the parameter, - * specifies that a heartbeat should be sent - * immediately to the peer address, and the current - * interval should remain unchanged. + * in milliseconds. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. * spp_pathmaxrxt - This contains the maximum number of * retransmissions before this address shall be - * considered unreachable. + * considered unreachable. If a value of zero + * is present in this field then no changes are to + * be made to this parameter. + * spp_pathmtu - When Path MTU discovery is disabled the value + * specified here will be the "fixed" path mtu. + * Note that if the spp_address field is empty + * then all associations on this address will + * have this fixed path mtu set upon them. + * + * spp_sackdelay - When delayed sack is enabled, this value specifies + * the number of milliseconds that sacks will be delayed + * for. This value will apply to all addresses of an + * association if the spp_address field is empty. Note + * also, that if delayed sack is enabled and this + * value is set to 0, no change is made to the last + * recorded delayed sack timer value. + * + * spp_flags - These flags are used to control various features + * on an association. The flag field may contain + * zero or more of the following options. + * + * SPP_HB_ENABLE - Enable heartbeats on the + * specified address. Note that if the address + * field is empty all addresses for the association + * have heartbeats enabled upon them. + * + * SPP_HB_DISABLE - Disable heartbeats on the + * speicifed address. Note that if the address + * field is empty all addresses for the association + * will have their heartbeats disabled. Note also + * that SPP_HB_ENABLE and SPP_HB_DISABLE are + * mutually exclusive, only one of these two should + * be specified. Enabling both fields will have + * undetermined results. + * + * SPP_HB_DEMAND - Request a user initiated heartbeat + * to be made immediately. + * + * SPP_PMTUD_ENABLE - This field will enable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. + * + * SPP_PMTUD_DISABLE - This field will disable PMTU + * discovery upon the specified address. Note that + * if the address feild is empty then all addresses + * on the association are effected. Not also that + * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually + * exclusive. Enabling both will have undetermined + * results. + * + * SPP_SACKDELAY_ENABLE - Setting this flag turns + * on delayed sack. The time specified in spp_sackdelay + * is used to specify the sack delay for this address. Note + * that if spp_address is empty then all addresses will + * enable delayed sack and take on the sack delay + * value specified in spp_sackdelay. + * SPP_SACKDELAY_DISABLE - Setting this flag turns + * off delayed sack. If the spp_address field is blank then + * delayed sack is disabled for the entire association. Note + * also that this field is mutually exclusive to + * SPP_SACKDELAY_ENABLE, setting both will have undefined + * results. */ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen) { - struct sctp_paddrparams params; - struct sctp_transport *trans; + struct sctp_paddrparams params; + struct sctp_transport *trans = NULL; + struct sctp_association *asoc = NULL; + struct sctp_sock *sp = sctp_sk(sk); if (len != sizeof(struct sctp_paddrparams)) return -EINVAL; + if (copy_from_user(¶ms, optval, len)) return -EFAULT; - /* If no association id is specified retrieve the default value - * for the endpoint that will be used for all future associations + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. */ - if (!params.spp_assoc_id && - sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { - params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval; - params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt; - - goto done; + if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + trans = sctp_addr_id2transport(sk, ¶ms.spp_address, + params.spp_assoc_id); + if (!trans) { + SCTP_DEBUG_PRINTK("Failed no transport\n"); + return -EINVAL; + } } - trans = sctp_addr_id2transport(sk, ¶ms.spp_address, - params.spp_assoc_id); - if (!trans) - return -EINVAL; - - /* The value of the heartbeat interval, in milliseconds. A value of 0, - * when modifying the parameter, specifies that the heartbeat on this - * address should be disabled. + /* Get association, if assoc_id != 0 and the socket is a one + * to many style socket, and an association was not found, then + * the id was invalid. */ - if (!trans->hb_allowed) - params.spp_hbinterval = 0; - else - params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval); + asoc = sctp_id2assoc(sk, params.spp_assoc_id); + if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { + SCTP_DEBUG_PRINTK("Failed no association\n"); + return -EINVAL; + } - /* spp_pathmaxrxt contains the maximum number of retransmissions - * before this address shall be considered unreachable. - */ - params.spp_pathmaxrxt = trans->max_retrans; + if (trans) { + /* Fetch transport values. */ + params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval); + params.spp_pathmtu = trans->pathmtu; + params.spp_pathmaxrxt = trans->pathmaxrxt; + params.spp_sackdelay = jiffies_to_msecs(trans->sackdelay); + + /*draft-11 doesn't say what to return in spp_flags*/ + params.spp_flags = trans->param_flags; + } else if (asoc) { + /* Fetch association values. */ + params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval); + params.spp_pathmtu = asoc->pathmtu; + params.spp_pathmaxrxt = asoc->pathmaxrxt; + params.spp_sackdelay = jiffies_to_msecs(asoc->sackdelay); + + /*draft-11 doesn't say what to return in spp_flags*/ + params.spp_flags = asoc->param_flags; + } else { + /* Fetch socket values. */ + params.spp_hbinterval = sp->hbinterval; + params.spp_pathmtu = sp->pathmtu; + params.spp_sackdelay = sp->sackdelay; + params.spp_pathmaxrxt = sp->pathmaxrxt; + + /*draft-11 doesn't say what to return in spp_flags*/ + params.spp_flags = sp->param_flags; + } -done: if (copy_to_user(optval, ¶ms, len)) return -EFAULT; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 268ddaf2dc0f..68d73e2dd155 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -86,10 +86,13 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->init_sent_count = 0; peer->state = SCTP_ACTIVE; - peer->hb_allowed = 0; + peer->param_flags = SPP_HB_DISABLE | + SPP_PMTUD_ENABLE | + SPP_SACKDELAY_ENABLE; + peer->hbinterval = 0; /* Initialize the default path max_retrans. */ - peer->max_retrans = sctp_max_retrans_path; + peer->pathmaxrxt = sctp_max_retrans_path; peer->error_count = 0; INIT_LIST_HEAD(&peer->transmitted); @@ -229,10 +232,10 @@ void sctp_transport_pmtu(struct sctp_transport *transport) dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); if (dst) { - transport->pmtu = dst_mtu(dst); + transport->pathmtu = dst_mtu(dst); dst_release(dst); } else - transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; + transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } /* Caches the dst entry and source address for a transport's destination @@ -254,8 +257,11 @@ void sctp_transport_route(struct sctp_transport *transport, af->get_saddr(asoc, dst, daddr, &transport->saddr); transport->dst = dst; + if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { + return; + } if (dst) { - transport->pmtu = dst_mtu(dst); + transport->pathmtu = dst_mtu(dst); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). @@ -264,7 +270,7 @@ void sctp_transport_route(struct sctp_transport *transport, opt->pf->af->to_sk_saddr(&transport->saddr, asoc->base.sk); } else - transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; + transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } /* Hold a reference to a transport. */ @@ -369,7 +375,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, ssthresh = transport->ssthresh; pba = transport->partial_bytes_acked; - pmtu = transport->asoc->pmtu; + pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less @@ -441,8 +447,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pmtu); - transport->cwnd = transport->asoc->pmtu; + 4*transport->asoc->pathmtu); + transport->cwnd = transport->asoc->pathmtu; break; case SCTP_LOWER_CWND_FAST_RTX: @@ -459,7 +465,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pmtu); + 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; break; @@ -479,7 +485,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, if ((jiffies - transport->last_time_ecne_reduced) > transport->rtt) { transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pmtu); + 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } @@ -496,7 +502,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, */ if ((jiffies - transport->last_time_used) > transport->rto) transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pmtu); + 4*transport->asoc->pathmtu); break; }; @@ -511,7 +517,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; - timeout = t->hb_interval + t->rto + sctp_jitter(t->rto); + timeout = t->hbinterval + t->rto + sctp_jitter(t->rto); timeout += jiffies; return timeout; } -- cgit v1.2.3-70-g09d2 From 7708610b1bff4a0ba8a73733d3c7c4bda9f94b21 Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Thu, 22 Dec 2005 11:37:30 -0800 Subject: [SCTP]: Add support for SCTP_DELAYED_ACK_TIME socket option. Signed-off-by: Frank Filz Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/user.h | 14 ++++ net/sctp/socket.c | 184 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) (limited to 'include') diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index b9052864fa5a..8a6bef6f91eb 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -93,6 +93,8 @@ enum sctp_optname { #define SCTP_STATUS SCTP_STATUS SCTP_GET_PEER_ADDR_INFO, #define SCTP_GET_PEER_ADDR_INFO SCTP_GET_PEER_ADDR_INFO + SCTP_DELAYED_ACK_TIME, +#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -526,6 +528,18 @@ struct sctp_paddrparams { __u32 spp_flags; } __attribute__((packed, aligned(4))); +/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) + * + * This options will get or set the delayed ack timer. The time is set + * in milliseconds. If the assoc_id is 0, then this sets or gets the + * endpoints default delayed ack timer value. If the assoc_id field is + * non-zero, then the set or get effects the specified association. + */ +struct sctp_assoc_value { + sctp_assoc_t assoc_id; + uint32_t assoc_value; +}; + /* * 7.2.2 Peer Address Information * diff --git a/net/sctp/socket.c b/net/sctp/socket.c index adb5ee62c2a6..fc04d185fa33 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2214,6 +2214,109 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, return 0; } +/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) + * + * This options will get or set the delayed ack timer. The time is set + * in milliseconds. If the assoc_id is 0, then this sets or gets the + * endpoints default delayed ack timer value. If the assoc_id field is + * non-zero, then the set or get effects the specified association. + * + * struct sctp_assoc_value { + * sctp_assoc_t assoc_id; + * uint32_t assoc_value; + * }; + * + * assoc_id - This parameter, indicates which association the + * user is preforming an action upon. Note that if + * this field's value is zero then the endpoints + * default value is changed (effecting future + * associations only). + * + * assoc_value - This parameter contains the number of milliseconds + * that the user is requesting the delayed ACK timer + * be set to. Note that this value is defined in + * the standard to be between 200 and 500 milliseconds. + * + * Note: a value of zero will leave the value alone, + * but disable SACK delay. A non-zero value will also + * enable SACK delay. + */ + +static int sctp_setsockopt_delayed_ack_time(struct sock *sk, + char __user *optval, int optlen) +{ + struct sctp_assoc_value params; + struct sctp_transport *trans = NULL; + struct sctp_association *asoc = NULL; + struct sctp_sock *sp = sctp_sk(sk); + + if (optlen != sizeof(struct sctp_assoc_value)) + return - EINVAL; + + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + + /* Validate value parameter. */ + if (params.assoc_value > 500) + return -EINVAL; + + /* Get association, if assoc_id != 0 and the socket is a one + * to many style socket, and an association was not found, then + * the id was invalid. + */ + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (params.assoc_value) { + if (asoc) { + asoc->sackdelay = + msecs_to_jiffies(params.assoc_value); + asoc->param_flags = + (asoc->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } else { + sp->sackdelay = params.assoc_value; + sp->param_flags = + (sp->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } + } else { + if (asoc) { + asoc->param_flags = + (asoc->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_DISABLE; + } else { + sp->param_flags = + (sp->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_DISABLE; + } + } + + /* If change is for association, also apply to each transport. */ + if (asoc) { + struct list_head *pos; + + list_for_each(pos, &asoc->peer.transport_addr_list) { + trans = list_entry(pos, struct sctp_transport, + transports); + if (params.assoc_value) { + trans->sackdelay = + msecs_to_jiffies(params.assoc_value); + trans->param_flags = + (trans->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_ENABLE; + } else { + trans->param_flags = + (trans->param_flags & ~SPP_SACKDELAY) | + SPP_SACKDELAY_DISABLE; + } + } + } + + return 0; +} + /* 7.1.3 Initialization Parameters (SCTP_INITMSG) * * Applications can specify protocol parameters for the default association @@ -2660,6 +2763,10 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; + case SCTP_DELAYED_ACK_TIME: + retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen); + break; + case SCTP_INITMSG: retval = sctp_setsockopt_initmsg(sk, optval, optlen); break; @@ -3417,6 +3524,79 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, return 0; } +/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) + * + * This options will get or set the delayed ack timer. The time is set + * in milliseconds. If the assoc_id is 0, then this sets or gets the + * endpoints default delayed ack timer value. If the assoc_id field is + * non-zero, then the set or get effects the specified association. + * + * struct sctp_assoc_value { + * sctp_assoc_t assoc_id; + * uint32_t assoc_value; + * }; + * + * assoc_id - This parameter, indicates which association the + * user is preforming an action upon. Note that if + * this field's value is zero then the endpoints + * default value is changed (effecting future + * associations only). + * + * assoc_value - This parameter contains the number of milliseconds + * that the user is requesting the delayed ACK timer + * be set to. Note that this value is defined in + * the standard to be between 200 and 500 milliseconds. + * + * Note: a value of zero will leave the value alone, + * but disable SACK delay. A non-zero value will also + * enable SACK delay. + */ +static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc = NULL; + struct sctp_sock *sp = sctp_sk(sk); + + if (len != sizeof(struct sctp_assoc_value)) + return - EINVAL; + + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + + /* Get association, if assoc_id != 0 and the socket is a one + * to many style socket, and an association was not found, then + * the id was invalid. + */ + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { + /* Fetch association values. */ + if (asoc->param_flags & SPP_SACKDELAY_ENABLE) + params.assoc_value = jiffies_to_msecs( + asoc->sackdelay); + else + params.assoc_value = 0; + } else { + /* Fetch socket values. */ + if (sp->param_flags & SPP_SACKDELAY_ENABLE) + params.assoc_value = sp->sackdelay; + else + params.assoc_value = 0; + } + + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + + if (put_user(len, optlen)) + return -EFAULT; + + return 0; +} + /* 7.1.3 Initialization Parameters (SCTP_INITMSG) * * Applications can specify protocol parameters for the default association @@ -4274,6 +4454,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; + case SCTP_DELAYED_ACK_TIME: + retval = sctp_getsockopt_delayed_ack_time(sk, len, optval, + optlen); + break; case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; -- cgit v1.2.3-70-g09d2 From 77d76ea310b50a9c8ff15bd290fcb4ed4961adf2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 22 Dec 2005 12:43:42 -0800 Subject: [NET]: Small cleanup to socket initialization sock_init can be done as a core_initcall instead of calling it directly in init/main.c Also I removed an out of date #ifdef. Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - include/linux/socket.h | 1 - init/main.c | 4 ---- net/socket.c | 10 +++++----- 4 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97f6580ce039..971677178e0c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,7 +32,6 @@ #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ -#define SLAB_SKB /* Slabified skbuffs */ #define CHECKSUM_NONE 0 #define CHECKSUM_HW 1 diff --git a/include/linux/socket.h b/include/linux/socket.h index 1739c2d5b95b..9f4019156fd8 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -27,7 +27,6 @@ struct __kernel_sockaddr_storage { #include /* __user */ extern int sysctl_somaxconn; -extern void sock_init(void); #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); diff --git a/init/main.c b/init/main.c index 27f97f9b4636..54aaf561cf66 100644 --- a/init/main.c +++ b/init/main.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -614,9 +613,6 @@ static void __init do_basic_setup(void) sysctl_init(); #endif - /* Networking initialization needs a process context */ - sock_init(); - do_initcalls(); } diff --git a/net/socket.c b/net/socket.c index 3145103cdf54..98be7ef3c086 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2036,7 +2036,7 @@ int sock_unregister(int family) return 0; } -void __init sock_init(void) +static int __init sock_init(void) { /* * Initialize sock SLAB cache. @@ -2044,12 +2044,10 @@ void __init sock_init(void) sk_init(); -#ifdef SLAB_SKB /* * Initialize skbuff SLAB cache */ skb_init(); -#endif /* * Initialize the protocols module. @@ -2058,8 +2056,8 @@ void __init sock_init(void) init_inodecache(); register_filesystem(&sock_fs_type); sock_mnt = kern_mount(&sock_fs_type); - /* The real protocol initialization is performed when - * do_initcalls is run. + + /* The real protocol initialization is performed in later initcalls. */ #ifdef CONFIG_NETFILTER @@ -2067,6 +2065,8 @@ void __init sock_init(void) #endif } +core_initcall(sock_init); /* early initcall */ + #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { -- cgit v1.2.3-70-g09d2 From 90ddc4f0470427df306f308ad03db6b6b21644b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Dec 2005 12:49:22 -0800 Subject: [NET]: move struct proto_ops to const I noticed that some of 'struct proto_ops' used in the kernel may share a cache line used by locks or other heavily modified data. (default linker alignement is 32 bytes, and L1_CACHE_LINE is 64 or 128 at least) This patch makes sure a 'struct proto_ops' can be declared as const, so that all cpus can share all parts of it without false sharing. This is not mandatory : a driver can still use a read/write structure if it needs to (and eventually a __read_mostly) I made a global stubstitute to change all existing occurences to make them const. This should reduce the possibility of false sharing on SMP, and speedup some socket system calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++-- include/net/inet_common.h | 4 ++-- include/net/ipv6.h | 4 ++-- include/net/protocol.h | 2 +- net/appletalk/ddp.c | 4 ++-- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/bnep/sock.c | 2 +- net/bluetooth/cmtp/sock.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/hidp/sock.c | 2 +- net/bluetooth/l2cap.c | 4 ++-- net/bluetooth/rfcomm/sock.c | 4 ++-- net/bluetooth/sco.c | 4 ++-- net/dccp/proto.c | 2 +- net/decnet/af_decnet.c | 4 ++-- net/econet/af_econet.c | 4 ++-- net/ipv4/af_inet.c | 6 +++--- net/ipv6/af_inet6.c | 6 +++--- net/ipx/af_ipx.c | 4 ++-- net/irda/af_irda.c | 16 ++++++++-------- net/key/af_key.c | 4 ++-- net/llc/af_llc.c | 4 ++-- net/netlink/af_netlink.c | 4 ++-- net/netrom/af_netrom.c | 4 ++-- net/packet/af_packet.c | 8 ++++---- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sunrpc/svcsock.c | 2 +- net/unix/af_unix.c | 6 +++--- net/wanrouter/af_wanpipe.c | 4 ++-- net/x25/af_x25.c | 4 ++-- 33 files changed, 66 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index d6a41e6577f6..28195a2d8ff0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -107,7 +107,7 @@ enum sock_type { struct socket { socket_state state; unsigned long flags; - struct proto_ops *ops; + const struct proto_ops *ops; struct fasync_struct *fasync_list; struct file *file; struct sock *sk; @@ -260,7 +260,7 @@ SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct ms SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \ (file, sock, vma)) \ \ -static struct proto_ops name##_ops = { \ +static const struct proto_ops name##_ops = { \ .family = fam, \ .owner = THIS_MODULE, \ .release = __lock_##name##_release, \ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index f943306ce5ff..227adcbdfec8 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -1,8 +1,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H -extern struct proto_ops inet_stream_ops; -extern struct proto_ops inet_dgram_ops; +extern const struct proto_ops inet_stream_ops; +extern const struct proto_ops inet_dgram_ops; /* * INET4 prototypes used by INET6 diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e3d5d7bc8837..11a725662c36 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -538,8 +538,8 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -extern struct proto_ops inet6_stream_ops; -extern struct proto_ops inet6_dgram_ops; +extern const struct proto_ops inet6_stream_ops; +extern const struct proto_ops inet6_dgram_ops; extern int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); diff --git a/include/net/protocol.h b/include/net/protocol.h index a29cb29647d0..63f7db99c2a6 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -65,7 +65,7 @@ struct inet_protosw { int protocol; /* This is the L4 protocol number. */ struct proto *prot; - struct proto_ops *ops; + const struct proto_ops *ops; int capability; /* Which (if any) capability do * we need to use this socket diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7982656b9c83..296f186802ff 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -63,7 +63,7 @@ #include struct datalink_proto *ddp_dl, *aarp_dl; -static struct proto_ops atalk_dgram_ops; +static const struct proto_ops atalk_dgram_ops; /**************************************************************************\ * * @@ -1841,7 +1841,7 @@ static struct net_proto_family atalk_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { .family = PF_APPLETALK, .owner = THIS_MODULE, .release = atalk_release, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 2684a92da22b..f2c541774dcd 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr, } -static struct proto_ops pvc_proto_ops = { +static const struct proto_ops pvc_proto_ops = { .family = PF_ATMPVC, .owner = THIS_MODULE, diff --git a/net/atm/svc.c b/net/atm/svc.c index d7b266136bf6..3a180cfd7b48 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return error; } -static struct proto_ops svc_proto_ops = { +static const struct proto_ops svc_proto_ops = { .family = PF_ATMSVC, .owner = THIS_MODULE, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 1b683f302657..8b5d10aaba05 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -54,7 +54,7 @@ HLIST_HEAD(ax25_list); DEFINE_SPINLOCK(ax25_list_lock); -static struct proto_ops ax25_proto_ops; +static const struct proto_ops ax25_proto_ops; static void ax25_free_sock(struct sock *sk) { @@ -1944,7 +1944,7 @@ static struct net_proto_family ax25_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops ax25_proto_ops = { +static const struct proto_ops ax25_proto_ops = { .family = PF_AX25, .owner = THIS_MODULE, .release = ax25_release, diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 9778c6acd53b..ccbaf69afc5b 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -146,7 +146,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return 0; } -static struct proto_ops bnep_sock_ops = { +static const struct proto_ops bnep_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = bnep_sock_release, diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index beb045bf5714..5e22343b6090 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -137,7 +137,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return -EINVAL; } -static struct proto_ops cmtp_sock_ops = { +static const struct proto_ops cmtp_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = cmtp_sock_release, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1d6d0a15c099..84e6c93a044a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -575,7 +575,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char return 0; } -static struct proto_ops hci_sock_ops = { +static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = hci_sock_release, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index f8986f881431..8f8dd931b294 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -143,7 +143,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return -EINVAL; } -static struct proto_ops hidp_sock_ops = { +static const struct proto_ops hidp_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = hidp_sock_release, diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 95f33cc7a24e..7f0781e4326f 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -57,7 +57,7 @@ #define VERSION "2.8" -static struct proto_ops l2cap_sock_ops; +static const struct proto_ops l2cap_sock_ops; static struct bt_sock_list l2cap_sk_list = { .lock = RW_LOCK_UNLOCKED @@ -2161,7 +2161,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); -static struct proto_ops l2cap_sock_ops = { +static const struct proto_ops l2cap_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = l2cap_sock_release, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 6c34261b232e..757d2dd3b02f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -58,7 +58,7 @@ #define BT_DBG(D...) #endif -static struct proto_ops rfcomm_sock_ops; +static const struct proto_ops rfcomm_sock_ops; static struct bt_sock_list rfcomm_sk_list = { .lock = RW_LOCK_UNLOCKED @@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf) static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); -static struct proto_ops rfcomm_sock_ops = { +static const struct proto_ops rfcomm_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = rfcomm_sock_release, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 648181430699..6b61323ce23c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -56,7 +56,7 @@ #define VERSION "0.5" -static struct proto_ops sco_sock_ops; +static const struct proto_ops sco_sock_ops; static struct bt_sock_list sco_sk_list = { .lock = RW_LOCK_UNLOCKED @@ -914,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf) static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); -static struct proto_ops sco_sock_ops = { +static const struct proto_ops sco_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = sco_sock_release, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 40a4c6899051..e4e629ed9bf7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -680,7 +680,7 @@ void dccp_shutdown(struct sock *sk, int how) EXPORT_SYMBOL_GPL(dccp_shutdown); -static struct proto_ops inet_dccp_ops = { +static const struct proto_ops inet_dccp_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d402e9020c68..65e3baed0251 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -149,7 +149,7 @@ static void dn_keepalive(struct sock *sk); #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) -static struct proto_ops dn_proto_ops; +static const struct proto_ops dn_proto_ops; static DEFINE_RWLOCK(dn_hash_lock); static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; static struct hlist_head dn_wild_sk; @@ -2342,7 +2342,7 @@ static struct net_proto_family dn_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops dn_proto_ops = { +static const struct proto_ops dn_proto_ops = { .family = AF_DECnet, .owner = THIS_MODULE, .release = dn_release, diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 34fdac51df96..ff58f49c8b4a 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -45,7 +45,7 @@ #include #include -static struct proto_ops econet_ops; +static const struct proto_ops econet_ops; static struct hlist_head econet_sklist; static DEFINE_RWLOCK(econet_lock); @@ -698,7 +698,7 @@ static struct net_proto_family econet_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { .family = PF_ECONET, .owner = THIS_MODULE, .release = econet_release, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 617e858beff1..4ed8a814c6cb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -785,7 +785,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } -struct proto_ops inet_stream_ops = { +const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -806,7 +806,7 @@ struct proto_ops inet_stream_ops = { .sendpage = tcp_sendpage }; -struct proto_ops inet_dgram_ops = { +const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -831,7 +831,7 @@ struct proto_ops inet_dgram_ops = { * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without * udp_poll */ -static struct proto_ops inet_sockraw_ops = { +static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 70a510ff31ee..7c9f19269f21 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -462,7 +462,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return(0); } -struct proto_ops inet6_stream_ops = { +const struct proto_ops inet6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -483,7 +483,7 @@ struct proto_ops inet6_stream_ops = { .sendpage = tcp_sendpage }; -struct proto_ops inet6_dgram_ops = { +const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -511,7 +511,7 @@ static struct net_proto_family inet6_family_ops = { }; /* Same as inet6_dgram_ops, sans udp_poll. */ -static struct proto_ops inet6_sockraw_ops = { +static const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 34b3bb868409..6c464c11bb09 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -75,7 +75,7 @@ static struct datalink_proto *pEII_datalink; static struct datalink_proto *p8023_datalink; static struct datalink_proto *pSNAP_datalink; -static struct proto_ops ipx_dgram_ops; +static const struct proto_ops ipx_dgram_ops; LIST_HEAD(ipx_interfaces); DEFINE_SPINLOCK(ipx_interfaces_lock); @@ -1901,7 +1901,7 @@ static struct net_proto_family ipx_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { .family = PF_IPX, .owner = THIS_MODULE, .release = ipx_release, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index f121f7de2032..e57683d424f7 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -62,12 +62,12 @@ static int irda_create(struct socket *sock, int protocol); -static struct proto_ops irda_stream_ops; -static struct proto_ops irda_seqpacket_ops; -static struct proto_ops irda_dgram_ops; +static const struct proto_ops irda_stream_ops; +static const struct proto_ops irda_seqpacket_ops; +static const struct proto_ops irda_dgram_ops; #ifdef CONFIG_IRDA_ULTRA -static struct proto_ops irda_ultra_ops; +static const struct proto_ops irda_ultra_ops; #define ULTRA_MAX_DATA 382 #endif /* CONFIG_IRDA_ULTRA */ @@ -2464,7 +2464,7 @@ static struct net_proto_family irda_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2485,7 +2485,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { .sendpage = sock_no_sendpage, }; -static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2506,7 +2506,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .sendpage = sock_no_sendpage, }; -static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2528,7 +2528,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { }; #ifdef CONFIG_IRDA_ULTRA -static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, diff --git a/net/key/af_key.c b/net/key/af_key.c index d32f7791f1e4..52efd04cbedb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -113,7 +113,7 @@ static __inline__ void pfkey_unlock_table(void) } -static struct proto_ops pfkey_ops; +static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { @@ -3127,7 +3127,7 @@ out: return err; } -static struct proto_ops pfkey_ops = { +static const struct proto_ops pfkey_ops = { .family = PF_KEY, .owner = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b6d3df5c911c..9cf65f9d8902 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -36,7 +36,7 @@ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; -static struct proto_ops llc_ui_ops; +static const struct proto_ops llc_ui_ops; static int llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); @@ -1098,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops llc_ui_ops = { +static const struct proto_ops llc_ui_ops = { .family = PF_LLC, .owner = THIS_MODULE, .release = llc_ui_release, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 96020d7087e8..7849cac14d3a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -293,7 +293,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) return 0; } -static struct proto_ops netlink_ops; +static const struct proto_ops netlink_ops; static int netlink_insert(struct sock *sk, u32 pid) { @@ -1656,7 +1656,7 @@ int netlink_unregister_notifier(struct notifier_block *nb) return notifier_chain_unregister(&netlink_chain, nb); } -static struct proto_ops netlink_ops = { +static const struct proto_ops netlink_ops = { .family = PF_NETLINK, .owner = THIS_MODULE, .release = netlink_release, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e5d82d711cae..05b653c05971 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -63,7 +63,7 @@ static unsigned short circuit = 0x101; static HLIST_HEAD(nr_list); static DEFINE_SPINLOCK(nr_list_lock); -static struct proto_ops nr_proto_ops; +static const struct proto_ops nr_proto_ops; /* * Socket removal during an interrupt is now safe. @@ -1337,7 +1337,7 @@ static struct net_proto_family nr_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops nr_proto_ops = { +static const struct proto_ops nr_proto_ops = { .family = PF_NETROM, .owner = THIS_MODULE, .release = nr_release, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3e2462760413..deda6fdb1e53 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -251,10 +251,10 @@ static void packet_sock_destruct(struct sock *sk) } -static struct proto_ops packet_ops; +static const struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET -static struct proto_ops packet_ops_spkt; +static const struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -1784,7 +1784,7 @@ out: #ifdef CONFIG_SOCK_PACKET -static struct proto_ops packet_ops_spkt = { +static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, @@ -1806,7 +1806,7 @@ static struct proto_ops packet_ops_spkt = { }; #endif -static struct proto_ops packet_ops = { +static const struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fa3be2b8fb5f..15c05165c905 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -866,7 +866,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, return 2; } -static struct proto_ops inet6_seqpacket_ops = { +static const struct proto_ops inet6_seqpacket_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f775d78aa59d..d1b0747a5b9d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -829,7 +829,7 @@ static struct notifier_block sctp_inetaddr_notifier = { }; /* Socket operations. */ -static struct proto_ops inet_seqpacket_ops = { +static const struct proto_ops inet_seqpacket_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c6a51911e71e..d68eba481291 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; - struct proto_ops *ops; + const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 04e850e04e3d..7d3fe6aebcdb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); -static struct proto_ops unix_stream_ops = { +static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_dgram_ops = { +static const struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_seqpacket_ops = { +static const struct proto_ops unix_seqpacket_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index 59fec59b2132..67948bf22dc4 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -181,7 +181,7 @@ struct wanpipe_opt #endif static int sk_count; -extern struct proto_ops wanpipe_ops; +extern const struct proto_ops wanpipe_ops; static unsigned long find_free_critical; static void wanpipe_unlink_driver(struct sock *sk); @@ -2546,7 +2546,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr return 0; } -struct proto_ops wanpipe_ops = { +const struct proto_ops wanpipe_ops = { .family = PF_WANPIPE, .owner = THIS_MODULE, .release = wanpipe_release, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 020d73cc8414..ca8b3b0b920d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -64,7 +64,7 @@ int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; HLIST_HEAD(x25_list); DEFINE_RWLOCK(x25_list_lock); -static struct proto_ops x25_proto_ops; +static const struct proto_ops x25_proto_ops; static struct x25_address null_x25_address = {" "}; @@ -1391,7 +1391,7 @@ static struct net_proto_family x25_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { .family = AF_X25, .owner = THIS_MODULE, .release = x25_release, -- cgit v1.2.3-70-g09d2 From 25995ff577675b58dbd848b7758e7bad87411947 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Dec 2005 02:42:22 -0200 Subject: [SOCK]: Introduce sk_receive_skb Its common enough to to justify that, TCP still can't use it as it has the prequeueing stuff, still to be made generic in the not so distant future :-) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 22 ++-------------------- include/net/sock.h | 23 +++++++++++++++++++++++ net/dccp/ipv4.c | 23 ++--------------------- net/dccp/ipv6.c | 17 +---------------- net/decnet/dn_nsp_in.c | 17 +---------------- 5 files changed, 29 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index a842ecc60a34..71e303b28646 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -383,8 +383,6 @@ static int pppoe_rcv(struct sk_buff *skb, { struct pppoe_hdr *ph; struct pppox_sock *po; - struct sock *sk; - int ret; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; @@ -395,24 +393,8 @@ static int pppoe_rcv(struct sk_buff *skb, ph = (struct pppoe_hdr *) skb->nh.raw; po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source); - if (!po) - goto drop; - - sk = sk_pppox(po); - bh_lock_sock(sk); - - /* Socket state is unknown, must put skb into backlog. */ - if (sock_owned_by_user(sk) != 0) { - sk_add_backlog(sk, skb); - ret = NET_RX_SUCCESS; - } else { - ret = pppoe_rcv_core(sk, skb); - } - - bh_unlock_sock(sk); - sock_put(sk); - - return ret; + if (po != NULL) + return sk_receive_skb(sk_pppox(po), skb); drop: kfree_skb(skb); out: diff --git a/include/net/sock.h b/include/net/sock.h index 91d28957dc10..6961700ff3a0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -926,6 +926,29 @@ static inline void sock_put(struct sock *sk) sk_free(sk); } +static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb) +{ + int rc = NET_RX_SUCCESS; + + if (sk_filter(sk, skb, 0)) + goto discard_and_relse; + + skb->dev = NULL; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = sk->sk_backlog_rcv(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); +out: + sock_put(sk); + return rc; +discard_and_relse: + kfree_skb(skb); + goto out; +} + /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. * Note that parent inode held reference count on this struct sock, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index c363051a7f16..99e8afa7ba1e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -914,7 +914,6 @@ int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; struct sock *sk; - int rc; /* Step 1: Check header basics: */ @@ -984,28 +983,10 @@ int dccp_v4_rcv(struct sk_buff *skb) goto do_time_wait; } - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { - dccp_pr_debug("xfrm4_policy_check failed\n"); + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - } - - if (sk_filter(sk, skb, 0)) { - dccp_pr_debug("sk_filter failed\n"); - goto discard_and_relse; - } - - skb->dev = NULL; - bh_lock_sock(sk); - rc = 0; - if (!sock_owned_by_user(sk)) - rc = dccp_v4_do_rcv(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - - sock_put(sk); - return rc; + return sk_receive_skb(sk, skb); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 599b0be21515..2e194c8f9953 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1032,7 +1032,6 @@ static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; struct sock *sk; - int rc; /* Step 1: Check header basics: */ @@ -1077,21 +1076,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb, 0)) - goto discard_and_relse; - - skb->dev = NULL; - - bh_lock_sock(sk); - rc = 0; - if (!sock_owned_by_user(sk)) - rc = dccp_v6_do_rcv(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - - sock_put(sk); - return rc ? -1 : 0; + return sk_receive_skb(sk, skb) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 369f25b60f3f..44bda85e678f 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -793,7 +793,6 @@ static int dn_nsp_rx_packet(struct sk_buff *skb) got_it: if (sk != NULL) { struct dn_scp *scp = DN_SK(sk); - int ret; /* Reset backoff */ scp->nsp_rxtshift = 0; @@ -807,21 +806,7 @@ got_it: goto free_out; } - bh_lock_sock(sk); - ret = NET_RX_SUCCESS; - if (decnet_debug_level & 8) - printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n", - (int)cb->rt_flags, (int)cb->nsp_flags, - (int)cb->src_port, (int)cb->dst_port, - !!sock_owned_by_user(sk)); - if (!sock_owned_by_user(sk)) - ret = dn_nsp_backlog_rcv(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - sock_put(sk); - - return ret; + return sk_receive_skb(sk, skb); } return dn_nsp_no_socket(skb, reason); -- cgit v1.2.3-70-g09d2 From 14c850212ed8f8cbb5972ad6b8812e08a0bc901c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Dec 2005 02:43:12 -0200 Subject: [INET_SOCK]: Move struct inet_sock & helper functions to net/inet_sock.h To help in reducing the number of include dependencies, several files were touched as they were getting needed headers indirectly for stuff they use. Thanks also to Alan Menegotto for pointing out that net/dccp/proto.c had linux/dccp.h include twice. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 + drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 + drivers/net/ns83820.c | 1 + drivers/net/sk98lin/skge.c | 1 + drivers/net/skge.c | 1 + drivers/net/tg3.c | 1 + fs/9p/trans_sock.c | 1 + fs/nfs/callback.c | 3 + include/linux/dccp.h | 3 +- include/linux/ip.h | 126 +--------------- include/linux/ipv6.h | 7 +- include/linux/udp.h | 6 +- include/net/atmclip.h | 2 +- include/net/dst.h | 1 + include/net/icmp.h | 9 +- include/net/ieee80211_crypt.h | 9 +- include/net/inet_connection_sock.h | 3 +- include/net/inet_ecn.h | 2 + include/net/inet_hashtables.h | 21 +-- include/net/inet_sock.h | 193 +++++++++++++++++++++++++ include/net/inet_timewait_sock.h | 2 +- include/net/ip.h | 17 ++- include/net/ip_fib.h | 2 + include/net/ip_vs.h | 12 +- include/net/ipv6.h | 3 + include/net/ndisc.h | 17 ++- include/net/neighbour.h | 2 +- include/net/pkt_act.h | 1 - include/net/raw.h | 2 + include/net/udp.h | 4 +- include/net/xfrm.h | 3 +- net/bridge/br_netfilter.c | 4 + net/bridge/netfilter/ebt_log.c | 1 + net/core/netpoll.c | 1 + net/dccp/ccid.h | 2 + net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/dccp/output.c | 1 + net/dccp/proto.c | 3 +- net/econet/af_econet.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/ah4.c | 1 + net/ipv4/arp.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv4/fib_hash.c | 1 + net/ipv4/fib_rules.c | 1 + net/ipv4/fib_semantics.c | 2 + net/ipv4/icmp.c | 1 + net/ipv4/igmp.c | 2 + net/ipv4/ip_input.c | 1 + net/ipv4/ip_options.c | 1 + net/ipv4/ip_sockglue.c | 1 + net/ipv4/ipcomp.c | 1 + net/ipv4/ipconfig.c | 2 + net/ipv4/ipmr.c | 1 + net/ipv4/ipvs/ip_vs_conn.c | 2 + net/ipv4/ipvs/ip_vs_ctl.c | 1 + net/ipv4/ipvs/ip_vs_dh.c | 2 + net/ipv4/ipvs/ip_vs_est.c | 3 + net/ipv4/ipvs/ip_vs_lblc.c | 2 + net/ipv4/ipvs/ip_vs_lblcr.c | 2 + net/ipv4/ipvs/ip_vs_proto_ah.c | 2 + net/ipv4/ipvs/ip_vs_proto_esp.c | 2 + net/ipv4/ipvs/ip_vs_proto_udp.c | 3 + net/ipv4/ipvs/ip_vs_sh.c | 2 + net/ipv4/ipvs/ip_vs_sync.c | 2 + net/ipv4/netfilter/ip_conntrack_amanda.c | 2 + net/ipv4/netfilter/ip_conntrack_proto_gre.c | 1 + net/ipv4/netfilter/ip_conntrack_proto_udp.c | 1 + net/ipv4/netfilter/ip_conntrack_standalone.c | 1 + net/ipv4/netfilter/ip_nat_snmp_basic.c | 2 + net/ipv4/netfilter/ipt_MASQUERADE.c | 2 + net/ipv4/netfilter/ipt_physdev.c | 1 + net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 1 + net/ipv4/udp.c | 1 + net/ipv6/ah6.c | 1 + net/ipv6/esp6.c | 1 + net/ipv6/ipcomp6.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + net/ipv6/netfilter/ip6t_LOG.c | 1 + net/ipv6/netfilter/ip6t_ah.c | 1 + net/ipv6/netfilter/ip6t_esp.c | 1 + net/sched/sch_teql.c | 1 + net/sctp/protocol.c | 1 + 87 files changed, 355 insertions(+), 184 deletions(-) create mode 100644 include/net/inet_sock.h (limited to 'include') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 475d98fa9e26..780009c7eaa6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -47,6 +47,8 @@ #include #include +#include + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index ef3ee035bbc8..ed0c2ead8bc1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -43,6 +43,8 @@ #include #include +#include + #include "ipoib.h" #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index f857ae94d261..b0c3b6ab6263 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -115,6 +115,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index ae7343934758..e1a2d52cc1fe 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -107,6 +107,7 @@ #include "h/skversion.h" +#include #include #include #include diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 00d683063c01..d8cc3aea032a 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -25,6 +25,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 2fc9893d69e1..59d916ccc810 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c index a93c2bf94c33..6a9a75d40f73 100644 --- a/fs/9p/trans_sock.c +++ b/fs/9p/trans_sock.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index f2ca782aba33..30cae3602867 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,9 @@ #include #include #include + +#include + #include "nfs4_fs.h" #include "callback.h" diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d0bdb499cf8d..088529f54965 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -192,10 +192,9 @@ enum { #include #include +#include #include -#include #include -#include enum dccp_state { DCCP_OPEN = TCP_ESTABLISHED, diff --git a/include/linux/ip.h b/include/linux/ip.h index 6ccc596c19c8..9e2eb9a602eb 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -16,6 +16,7 @@ */ #ifndef _LINUX_IP_H #define _LINUX_IP_H +#include #include #define IPTOS_TOS_MASK 0x1E @@ -78,131 +79,6 @@ #define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ #define IPOPT_TS_PRESPEC 3 /* specified modules only */ -#ifdef __KERNEL__ -#include -#include -#include -#include -#include -#include - -struct ip_options { - __u32 faddr; /* Saved first hop address */ - unsigned char optlen; - unsigned char srr; - unsigned char rr; - unsigned char ts; - unsigned char is_setbyuser:1, /* Set by setsockopt? */ - is_data:1, /* Options in __data, rather than skb */ - is_strictroute:1, /* Strict source route */ - srr_is_hit:1, /* Packet destination addr was our one */ - is_changed:1, /* IP checksum more not valid */ - rr_needaddr:1, /* Need to record addr of outgoing dev */ - ts_needtime:1, /* Need to record timestamp */ - ts_needaddr:1; /* Need to record addr of outgoing dev */ - unsigned char router_alert; - unsigned char __pad1; - unsigned char __pad2; - unsigned char __data[0]; -}; - -#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) - -struct inet_request_sock { - struct request_sock req; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - u16 inet6_rsk_offset; - /* 2 bytes hole, try to pack */ -#endif - u32 loc_addr; - u32 rmt_addr; - u16 rmt_port; - u16 snd_wscale : 4, - rcv_wscale : 4, - tstamp_ok : 1, - sack_ok : 1, - wscale_ok : 1, - ecn_ok : 1, - acked : 1; - struct ip_options *opt; -}; - -static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) -{ - return (struct inet_request_sock *)sk; -} - -struct ipv6_pinfo; - -struct inet_sock { - /* sk and pinet6 has to be the first two members of inet_sock */ - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif - /* Socket demultiplex comparisons on incoming packets. */ - __u32 daddr; /* Foreign IPv4 addr */ - __u32 rcv_saddr; /* Bound local IPv4 addr */ - __u16 dport; /* Destination port */ - __u16 num; /* Local port */ - __u32 saddr; /* Sending source */ - __s16 uc_ttl; /* Unicast TTL */ - __u16 cmsg_flags; - struct ip_options *opt; - __u16 sport; /* Source port */ - __u16 id; /* ID counter for DF pkts */ - __u8 tos; /* TOS */ - __u8 mc_ttl; /* Multicasting TTL */ - __u8 pmtudisc; - unsigned recverr : 1, - is_icsk : 1, /* inet_connection_sock? */ - freebind : 1, - hdrincl : 1, - mc_loop : 1; - int mc_index; /* Multicast device index */ - __u32 mc_addr; - struct ip_mc_socklist *mc_list; /* Group array */ - /* - * Following members are used to retain the infomation to build - * an ip header on each ip fragmentation while the socket is corked. - */ - struct { - unsigned int flags; - unsigned int fragsize; - struct ip_options *opt; - struct rtable *rt; - int length; /* Total length of all frames */ - u32 addr; - struct flowi fl; - } cork; -}; - -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ -#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ - -static inline struct inet_sock *inet_sk(const struct sock *sk) -{ - return (struct inet_sock *)sk; -} - -static inline void __inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from, - const int ancestor_size) -{ - memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, - sk_from->sk_prot->obj_size - ancestor_size); -} -#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) -static inline void inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from) -{ - __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); -} -#endif -#endif - -extern int inet_sk_rebuild_header(struct sock *sk); - struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a0d04891fe12..93bbed5c6cf4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -171,12 +171,13 @@ enum { }; #ifdef __KERNEL__ -#include /* struct sockaddr_in6 */ #include -#include /* struct ipv6_mc_socklist */ #include #include +#include /* struct ipv6_mc_socklist */ +#include + /* This structure contains results of exthdrs parsing as offsets from skb->nh. @@ -346,8 +347,6 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) -#include - struct inet6_timewait_sock { struct in6_addr tw_v6_daddr; struct in6_addr tw_v6_rcv_saddr; diff --git a/include/linux/udp.h b/include/linux/udp.h index b60e0b4a25c4..85a55658831c 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -35,10 +35,10 @@ struct udphdr { #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ #ifdef __KERNEL__ - #include -#include -#include +#include + +#include struct udp_sock { /* inet_sock has to be the first member */ diff --git a/include/net/atmclip.h b/include/net/atmclip.h index 47048b1d179a..90fcc98e676f 100644 --- a/include/net/atmclip.h +++ b/include/net/atmclip.h @@ -7,7 +7,6 @@ #define _ATMCLIP_H #include -#include #include #include #include @@ -18,6 +17,7 @@ #define CLIP_VCC(vcc) ((struct clip_vcc *) ((vcc)->user_back)) #define NEIGH2ENTRY(neigh) ((struct atmarp_entry *) (neigh)->primary_key) +struct sk_buff; struct clip_vcc { struct atm_vcc *vcc; /* VCC descriptor */ diff --git a/include/net/dst.h b/include/net/dst.h index 6c196a5baf24..bee8b84d329d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -9,6 +9,7 @@ #define _NET_DST_H #include +#include #include #include #include diff --git a/include/net/icmp.h b/include/net/icmp.h index 6cdebeee5f96..e7c3f20fbafc 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -20,12 +20,9 @@ #include #include -#include -#include -#include +#include #include -#include struct icmp_err { int errno; @@ -38,6 +35,10 @@ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics); #define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(icmp_statistics, field) #define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmp_statistics, field) +struct dst_entry; +struct net_proto_family; +struct sk_buff; + extern void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info); extern int icmp_rcv(struct sk_buff *skb); extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h index 225fc751d464..03b766afdc39 100644 --- a/include/net/ieee80211_crypt.h +++ b/include/net/ieee80211_crypt.h @@ -23,12 +23,17 @@ #ifndef IEEE80211_CRYPT_H #define IEEE80211_CRYPT_H -#include +#include +#include +#include enum { IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), }; +struct sk_buff; +struct module; + struct ieee80211_crypto_ops { const char *name; struct list_head list; @@ -87,6 +92,8 @@ struct ieee80211_crypt_data { atomic_t refcnt; }; +struct ieee80211_device; + int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops); int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops); struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 91888967d3e3..50234fa56a68 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,9 +16,10 @@ #define _INET_CONNECTION_SOCK_H #include -#include #include #include + +#include #include #define INET_CSK_DEBUG 1 diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index b0c47e2eccf1..d599c6bfbb86 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -3,6 +3,8 @@ #include #include + +#include #include enum { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c83baa79f66e..135d80fd658e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -128,26 +129,6 @@ struct inet_hashinfo { kmem_cache_t *bind_bucket_cachep; }; -static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, - const __u32 faddr, const __u16 fport) -{ - unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); - h ^= h >> 16; - h ^= h >> 8; - return h; -} - -static inline int inet_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const __u32 laddr = inet->rcv_saddr; - const __u16 lport = inet->num; - const __u32 faddr = inet->daddr; - const __u16 fport = inet->dport; - - return inet_ehashfn(laddr, lport, faddr, fport); -} - static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h new file mode 100644 index 000000000000..883eb529ef8e --- /dev/null +++ b/include/net/inet_sock.h @@ -0,0 +1,193 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for inet_sock + * + * Authors: Many, reorganised here by + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_SOCK_H +#define _INET_SOCK_H + +#include + +#include +#include + +#include +#include +#include + +/** struct ip_options - IP Options + * + * @faddr - Saved first hop address + * @is_setbyuser - Set by setsockopt? + * @is_data - Options in __data, rather than skb + * @is_strictroute - Strict source route + * @srr_is_hit - Packet destination addr was our one + * @is_changed - IP checksum more not valid + * @rr_needaddr - Need to record addr of outgoing dev + * @ts_needtime - Need to record timestamp + * @ts_needaddr - Need to record addr of outgoing dev + */ +struct ip_options { + __u32 faddr; + unsigned char optlen; + unsigned char srr; + unsigned char rr; + unsigned char ts; + unsigned char is_setbyuser:1, + is_data:1, + is_strictroute:1, + srr_is_hit:1, + is_changed:1, + rr_needaddr:1, + ts_needtime:1, + ts_needaddr:1; + unsigned char router_alert; + unsigned char __pad1; + unsigned char __pad2; + unsigned char __data[0]; +}; + +#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) + +struct inet_request_sock { + struct request_sock req; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u16 inet6_rsk_offset; + /* 2 bytes hole, try to pack */ +#endif + u32 loc_addr; + u32 rmt_addr; + u16 rmt_port; + u16 snd_wscale : 4, + rcv_wscale : 4, + tstamp_ok : 1, + sack_ok : 1, + wscale_ok : 1, + ecn_ok : 1, + acked : 1; + struct ip_options *opt; +}; + +static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) +{ + return (struct inet_request_sock *)sk; +} + +struct ip_mc_socklist; +struct ipv6_pinfo; +struct rtable; + +/** struct inet_sock - representation of INET sockets + * + * @sk - ancestor class + * @pinet6 - pointer to IPv6 control block + * @daddr - Foreign IPv4 addr + * @rcv_saddr - Bound local IPv4 addr + * @dport - Destination port + * @num - Local port + * @saddr - Sending source + * @uc_ttl - Unicast TTL + * @sport - Source port + * @id - ID counter for DF pkts + * @tos - TOS + * @mc_ttl - Multicasting TTL + * @is_icsk - is this an inet_connection_sock? + * @mc_index - Multicast device index + * @mc_list - Group array + * @cork - info to build ip hdr on each ip frag while socket is corked + */ +struct inet_sock { + /* sk and pinet6 has to be the first two members of inet_sock */ + struct sock sk; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct ipv6_pinfo *pinet6; +#endif + /* Socket demultiplex comparisons on incoming packets. */ + __u32 daddr; + __u32 rcv_saddr; + __u16 dport; + __u16 num; + __u32 saddr; + __s16 uc_ttl; + __u16 cmsg_flags; + struct ip_options *opt; + __u16 sport; + __u16 id; + __u8 tos; + __u8 mc_ttl; + __u8 pmtudisc; + __u8 recverr:1, + is_icsk:1, + freebind:1, + hdrincl:1, + mc_loop:1; + int mc_index; + __u32 mc_addr; + struct ip_mc_socklist *mc_list; + struct { + unsigned int flags; + unsigned int fragsize; + struct ip_options *opt; + struct rtable *rt; + int length; /* Total length of all frames */ + u32 addr; + struct flowi fl; + } cork; +}; + +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ + +static inline struct inet_sock *inet_sk(const struct sock *sk) +{ + return (struct inet_sock *)sk; +} + +static inline void __inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from, + const int ancestor_size) +{ + memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, + sk_from->sk_prot->obj_size - ancestor_size); +} +#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) +static inline void inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); +} +#endif + +extern int inet_sk_rebuild_header(struct sock *sk); + +static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport) +{ + unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; + return h; +} + +static inline int inet_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const __u32 laddr = inet->rcv_saddr; + const __u16 lport = inet->num; + const __u32 faddr = inet->daddr; + const __u16 fport = inet->dport; + + return inet_ehashfn(laddr, lport, faddr, fport); +} + +#endif /* _INET_SOCK_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index e396a65473d7..1da294c47522 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -17,13 +17,13 @@ #include -#include #include #include #include #include #include +#include #include #include #include diff --git a/include/net/ip.h b/include/net/ip.h index 4d6294ba038e..f7e7fd728b67 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,14 +24,10 @@ #include #include -#include #include #include -#include -#include -#include -#include -#include + +#include #include struct sock; @@ -75,6 +71,13 @@ extern rwlock_t ip_ra_lock; #define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ +struct msghdr; +struct net_device; +struct packet_type; +struct rtable; +struct sk_buff; +struct sockaddr; + extern void ip_mc_dropsocket(struct sock *); extern void ip_mc_dropdevice(struct net_device *dev); extern int igmp_mc_proc_init(void); @@ -184,6 +187,8 @@ extern int sysctl_ip_dynaddr; extern void ipfrag_init(void); #ifdef CONFIG_INET +#include + /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ static inline diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 14de4ebd1211..e000fa2cd5f6 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -238,6 +238,8 @@ extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, struct net_device *dev, u32 *spec_dst, u32 *itag); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); +struct rtentry; + /* Exported by fib_semantics.c */ extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3b5559a023a4..7d2674fde19a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -251,16 +251,15 @@ struct ip_vs_daemon_user { #include #include /* for struct list_head */ #include /* for struct rwlock_t */ -#include /* for struct sk_buff */ -#include /* for struct iphdr */ #include /* for struct atomic_t */ -#include /* for struct neighbour */ -#include /* for struct dst_entry */ -#include #include +#include +#include #ifdef CONFIG_IP_VS_DEBUG +#include + extern int ip_vs_get_debug_level(void); #define IP_VS_DBG(level, msg...) \ do { \ @@ -429,8 +428,11 @@ struct ip_vs_stats spinlock_t lock; /* spin lock */ }; +struct dst_entry; +struct iphdr; struct ip_vs_conn; struct ip_vs_app; +struct sk_buff; struct ip_vs_protocol { struct ip_vs_protocol *next; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 11a725662c36..860bbac4c4ee 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -541,6 +541,9 @@ extern int sysctl_ip6frag_secret_interval; extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; +struct group_source_req; +struct group_filter; + extern int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index f85d6e4b7442..bbac87eeb422 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -35,11 +35,20 @@ enum { #ifdef __KERNEL__ -#include -#include +#include +#include #include +#include +#include + #include -#include + +struct ctl_table; +struct file; +struct inet6_dev; +struct net_device; +struct net_proto_family; +struct sk_buff; extern struct neigh_table nd_tbl; @@ -108,7 +117,7 @@ extern int igmp6_event_report(struct sk_buff *skb); extern void igmp6_cleanup(void); #ifdef CONFIG_SYSCTL -extern int ndisc_ifinfo_sysctl_change(ctl_table *ctl, +extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 34c07731933d..6fa9ae190741 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -49,8 +49,8 @@ #ifdef __KERNEL__ #include -#include #include +#include #include #include diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h index bd08964b72c0..b225d8472b7e 100644 --- a/include/net/pkt_act.h +++ b/include/net/pkt_act.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/include/net/raw.h b/include/net/raw.h index f47917469b12..e67b28a0248c 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -19,6 +19,8 @@ #include +#include + extern struct proto raw_prot; extern void raw_err(struct sock *, struct sk_buff *, u32 info); diff --git a/include/net/udp.h b/include/net/udp.h index 107b9d791a1f..766fba1369ce 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -22,9 +22,8 @@ #ifndef _UDP_H #define _UDP_H -#include -#include #include +#include #include #include #include @@ -62,6 +61,7 @@ static inline int udp_lport_inuse(u16 num) extern struct proto udp_prot; +struct sk_buff; extern void udp_err(struct sk_buff *, u32); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 487abca3ca6f..07d7b50cdd76 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2,11 +2,12 @@ #define _NET_XFRM_H #include +#include #include #include #include #include -#include +#include #include #include #include diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 23422bd53a5e..223f8270daee 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -33,8 +34,11 @@ #include #include #include + #include #include +#include + #include #include #include "br_private.h" diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index c436e6c6242b..9f6e0193ae10 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -9,6 +9,7 @@ * */ +#include #include #include #include diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 49424a42a2c0..281a632fa6a6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c37eeeaf5c6e..de681c6ad081 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -21,6 +21,8 @@ #define CCID_MAX 255 +struct tcp_info; + struct ccid { unsigned char ccid_id; const char *ccid_name; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 99e8afa7ba1e..3f244670764a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2e194c8f9953..c609dc78f487 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/net/dccp/output.c b/net/dccp/output.c index 95a3c2c6a3ce..efd7ffb903a1 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -15,6 +15,7 @@ #include #include +#include #include #include "ackvec.h" diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e4e629ed9bf7..65b11ea90d85 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include @@ -34,7 +34,6 @@ #include #include #include -#include #include "ccid.h" #include "dccp.h" diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index ff58f49c8b4a..70fb2b88da65 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4ed8a814c6cb..36a6306ca5a3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -93,6 +93,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 035ad2c9e1ba..aed537fa2c88 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b425748f02d7..37432088fe6d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 04a6fe3e95a2..7b9bb28e2ee9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -58,6 +58,7 @@ #endif #include +#include #include #include #include diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b18ce66e7b7..73bfcae8af9c 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -9,6 +9,7 @@ #include #include #include +#include #include /* decapsulation data for use when post-processing */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 19b1b984d687..18f5e509281a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 7ea0209cb169..e2890ec8159e 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0b298bbc1518..0dd4d06e456d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6d2a6ac070e3..ef4724de7350 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 92e23b2ad4d2..be5a519cd2f8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4a195c724f01..34758118c10c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -91,6 +91,8 @@ #include #include #include + +#include #include #include #include diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 473d0f2b2e0d..e45846ae570b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -128,6 +128,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index dbe12da8d8b3..d3f6c468faf4 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index add019c746f8..6986e11d65cc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index fc718df17b40..d64e2ec8da7b 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -28,6 +28,7 @@ #include #include #include +#include struct ipcomp_tfms { struct list_head list; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e8674baaa8d9..bb3613ec448c 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 302b7eb507c9..caa3b7d2e48a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 2a3a8c59c655..d35cea31cb55 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -24,7 +24,9 @@ * */ +#include #include +#include #include #include /* for proc_net_* */ #include diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 9bdcf31b760e..fe2c39d2a002 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -35,6 +35,7 @@ #include #include +#include #include #include diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index f3bc320dce93..9fee19c4c617 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -37,8 +37,10 @@ * */ +#include #include #include +#include #include diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 67b3e2fc1fa1..e7004741ac73 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -13,7 +13,10 @@ * Changes: * */ +#include #include +#include +#include #include #include diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index b6dad7e3710c..6e5cb92a5c83 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -41,8 +41,10 @@ * me to write this module. */ +#include #include #include +#include /* for sysctl */ #include diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 8c78ef76c121..32ba37ba72d8 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -39,8 +39,10 @@ * */ +#include #include #include +#include /* for sysctl */ #include diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index 453e94a0bbd7..8b0505b09317 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c @@ -12,6 +12,8 @@ * */ +#include +#include #include #include #include diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index 478e5c7c7e8e..c36ccf057a19 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c @@ -12,6 +12,8 @@ * */ +#include +#include #include #include #include diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8ae5f2e0aefa..89d9175d8f28 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -15,8 +15,11 @@ * */ +#include +#include #include #include +#include #include diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 6f7c50e44a39..7775e6cc68be 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -34,8 +34,10 @@ * */ +#include #include #include +#include #include diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 2e5ced3d8062..1bca714bda3d 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -21,12 +21,14 @@ #include #include +#include #include #include #include #include #include #include /* for ip_mc_join_group */ +#include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index e52847fa10f5..0366eedb4d70 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -18,11 +18,13 @@ * */ +#include #include #include #include #include #include +#include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 744abb9d377a..57956dee60c8 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -31,6 +31,7 @@ #include #include #include +#include static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index f2dcac7c7660..46becbe4fe58 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dd476b191f4b..a88bcc551244 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -27,6 +27,7 @@ #endif #include #include +#include #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 8acb7ed40b47..4f95d477805c 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -44,6 +44,7 @@ * */ #include +#include #include #include #include @@ -53,6 +54,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 275a174c6fe6..27860510ca6d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c index 1a53924041fc..03f554857a4d 100644 --- a/net/ipv4/netfilter/ipt_physdev.c +++ b/net/ipv4/netfilter/ipt_physdev.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 0d7dc668db46..39d49dc333a7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index dbf82955aabe..16984d4a8a06 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 67c036384e77..223abaa72bc5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f3629730eb15..13cc7f895583 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8bfbe9970793..6de8ee1a5ad9 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -36,6 +36,7 @@ #include #include #include +#include #include static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 55917fb17094..626dd39685f2 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dd80020d8740..ea43ef1d94a7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -15,6 +15,7 @@ * - new extension header parser code */ #include +#include #include #include #include diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 0cd1d1bd9033..ae4653bfd654 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index dde37793d20b..268918d5deea 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index 24bc0cde43a1..65937de1b58c 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6cf0342706b5..c4a2a8c4c339 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d1b0747a5b9d..de693b43c8ea 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 8639a11e23d9eb0a6ceac2feed27acdfbb158f95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Dec 2005 15:17:57 -0200 Subject: [TCP]: Don't use __constant_htonl for a non const arg Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/tcp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 176221cd0cce..369930497401 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -994,11 +994,11 @@ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; int this_sack; - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK << 8) | - (TCPOLEN_SACK_BASE + - (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK))); + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_SACK << 8) | + (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * + TCPOLEN_SACK_PERBLOCK))); for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { *ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); -- cgit v1.2.3-70-g09d2 From 17ba15fb6264f27374bc87f4c3f8519b80289d85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Dec 2005 20:57:40 -0800 Subject: [PPPOX]: Fix assignment into const proto_ops. And actually, with this, the whole pppox layer can basically be removed and subsumed into pppoe.c, no other pppox sub-protocol implementation exists and we've had this thing for at least 4 years. Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 9 ++++----- drivers/net/pppox.c | 10 +++------- include/linux/if_pppox.h | 3 +-- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 71e303b28646..9369f811075d 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -85,7 +85,7 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb); static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb); -static struct proto_ops pppoe_ops; +static const struct proto_ops pppoe_ops; static DEFINE_RWLOCK(pppoe_hash_lock); static struct ppp_channel_ops pppoe_chan_ops; @@ -1063,9 +1063,7 @@ static int __init pppoe_proc_init(void) static inline int pppoe_proc_init(void) { return 0; } #endif /* CONFIG_PROC_FS */ -/* ->ioctl are set at pppox_create */ - -static struct proto_ops pppoe_ops = { +static const struct proto_ops pppoe_ops = { .family = AF_PPPOX, .owner = THIS_MODULE, .release = pppoe_release, @@ -1081,7 +1079,8 @@ static struct proto_ops pppoe_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, - .mmap = sock_no_mmap + .mmap = sock_no_mmap, + .ioctl = pppox_ioctl, }; static struct pppox_proto pppoe_proto = { diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c index 0c1e114527fb..9315046b3f55 100644 --- a/drivers/net/pppox.c +++ b/drivers/net/pppox.c @@ -68,8 +68,7 @@ EXPORT_SYMBOL(register_pppox_proto); EXPORT_SYMBOL(unregister_pppox_proto); EXPORT_SYMBOL(pppox_unbind_sock); -static int pppox_ioctl(struct socket* sock, unsigned int cmd, - unsigned long arg) +int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); @@ -105,6 +104,7 @@ static int pppox_ioctl(struct socket* sock, unsigned int cmd, return rc; } +EXPORT_SYMBOL(pppox_ioctl); static int pppox_create(struct socket *sock, int protocol) { @@ -119,11 +119,7 @@ static int pppox_create(struct socket *sock, int protocol) goto out; rc = pppox_protos[protocol]->create(sock); - if (!rc) { - /* We get to set the ioctl handler. */ - /* For everything else, pppox is just a shell. */ - sock->ops->ioctl = pppox_ioctl; - } + module_put(pppox_protos[protocol]->owner); out: return rc; diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index e677f73f13dd..4fab3d0a4bce 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -157,8 +157,7 @@ struct pppox_proto { extern int register_pppox_proto(int proto_num, struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ -extern int pppox_channel_ioctl(struct ppp_channel *pc, unsigned int cmd, - unsigned long arg); +extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); /* PPPoX socket states */ enum { -- cgit v1.2.3-70-g09d2 From 4947d3ef8de7b4f42aed6ea9ba689dc8fb45b5a5 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 3 Jan 2006 14:06:50 -0800 Subject: [NET]: Speed up __alloc_skb() From: Benjamin LaHaise In __alloc_skb(), the use of skb_shinfo() which casts a u8 * to the shared info structure results in gcc being forced to do a reload of the pointer since it has no information on possible aliasing. Fix this by using a pointer to refer to skb_shared_info. By initializing skb_shared_info sequentially, the write combining buffers can reduce the number of memory transactions to a single write. Reorder the initialization in __alloc_skb() to match the structure definition. There is also an alignment issue on 64 bit systems with skb_shared_info by converting nr_frags to a short everything packs up nicely. Also, pass the slab cache pointer according to the fclone flag instead of using two almost identical function calls. This raises bw_unix performance up to a peak of 707KB/s when combined with the spinlock patch. It should help other networking protocols, too. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 971677178e0c..483cfc47ec34 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -133,7 +133,7 @@ struct skb_frag_struct { */ struct skb_shared_info { atomic_t dataref; - unsigned int nr_frags; + unsigned short nr_frags; unsigned short tso_size; unsigned short tso_segs; unsigned short ufo_size; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 83fee37de38e..070f91cfde59 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone) { + struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; /* Get the HEAD */ - if (fclone) - skb = kmem_cache_alloc(skbuff_fclone_cache, - gfp_mask & ~__GFP_DMA); - else - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); - + skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache, + gfp_mask & ~__GFP_DMA); if (!skb) goto out; @@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb->tail = data; skb->end = data + size; + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->tso_size = 0; + shinfo->tso_segs = 0; + shinfo->ufo_size = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + if (fclone) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, child->fclone = SKB_FCLONE_UNAVAILABLE; } - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; - skb_shinfo(skb)->ufo_size = 0; - skb_shinfo(skb)->ip6_frag_id = 0; out: return skb; nodata: -- cgit v1.2.3-70-g09d2 From fd19f329a32bdc4eb07885e0b3889567cfe00aa7 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 3 Jan 2006 14:10:46 -0800 Subject: [AF_UNIX]: Convert to use a spinlock instead of rwlock From: Benjamin LaHaise In af_unix, a rwlock is used to protect internal state. At least on my P4 with HT it is faster to use a spinlock due to the simpler memory barrier used to unlock. This patch raises bw_unix to ~690K/s. Signed-off-by: David S. Miller --- include/net/af_unix.h | 10 +++++----- net/unix/af_unix.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 3f302ae98c03..bfc1779fc753 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -58,10 +58,10 @@ struct unix_skb_parms { #define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) -#define unix_state_rlock(s) read_lock(&unix_sk(s)->lock) -#define unix_state_runlock(s) read_unlock(&unix_sk(s)->lock) -#define unix_state_wlock(s) write_lock(&unix_sk(s)->lock) -#define unix_state_wunlock(s) write_unlock(&unix_sk(s)->lock) +#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) +#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) #ifdef __KERNEL__ /* The AF_UNIX socket */ @@ -76,7 +76,7 @@ struct unix_sock { struct sock *other; struct sock *gc_tree; atomic_t inflight; - rwlock_t lock; + spinlock_t lock; wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7d3fe6aebcdb..1ddd36d50091 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -564,7 +564,7 @@ static struct sock * unix_create1(struct socket *sock) u = unix_sk(sk); u->dentry = NULL; u->mnt = NULL; - rwlock_init(&u->lock); + spin_lock_init(&u->lock); atomic_set(&u->inflight, sock ? 0 : -1); init_MUTEX(&u->readsem); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); -- cgit v1.2.3-70-g09d2 From b461d2f2188c1c578ed651e4cdf608be7a993cd4 Mon Sep 17 00:00:00 2001 From: Per Liden Date: Tue, 3 Jan 2006 14:13:29 -0800 Subject: [NETLINK] genetlink: fix cmd type in genl_ops to be consistent to u8 Signed-off-by: Per Liden ACKed-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 +- net/netlink/genetlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 52d8b1a73d52..c5b96b2b8155 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -60,7 +60,7 @@ struct genl_info */ struct genl_ops { - unsigned int cmd; + u8 cmd; unsigned int flags; struct nla_policy *policy; int (*doit)(struct sk_buff *skb, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 287cfcc56951..3b1378498d50 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -441,7 +441,7 @@ errout: } static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, - int seq, int cmd) + int seq, u8 cmd) { struct sk_buff *skb; int err; -- cgit v1.2.3-70-g09d2 From 88df8ef59a3eb54b1e2412765ff2736d2376d1ca Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 Jan 2006 15:25:45 -0800 Subject: [NET]: Don't exclude broadcast addresses from is_multicast_ether_addr() The check for multicast shouldn't exclude broadcast type addresses. This reverts the incorrect change done in 2.6.13. The broadcast address is a multicast address and should be excluded from being a valid_ether_address for use in bridging or device address. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 5f49a30eb6f2..745c988359c0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -63,10 +63,11 @@ static inline int is_zero_ether_addr(const u8 *addr) * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a multicast address. + * By definition the broadcast address is also a multicast address. */ static inline int is_multicast_ether_addr(const u8 *addr) { - return ((addr[0] != 0xff) && (0x01 & addr[0])); + return (0x01 & addr[0]); } /** -- cgit v1.2.3-70-g09d2 From 40efc6fa179f440a008333ea98f701bc35a1f97f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 Jan 2006 16:03:49 -0800 Subject: [TCP]: less inline's TCP inline usage cleanup: * get rid of inline in several places * replace __inline__ with inline where possible * move functions used in one file out of tcp.h * let compiler decide on used once cases On x86_64: text data bss dec hex filename 3594701 648348 567400 4810449 4966d1 vmlinux.orig 3593133 648580 567400 4809113 496199 vmlinux On sparc64: text data bss dec hex filename 2538278 406152 530392 3474822 350586 vmlinux.ORIG 2536382 406384 530392 3473158 34ff06 vmlinux Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 193 +++++++------------------------------------------- net/ipv4/tcp_cong.c | 28 ++++++++ net/ipv4/tcp_input.c | 82 +++++++++++++++------ net/ipv4/tcp_ipv4.c | 9 ++- net/ipv4/tcp_output.c | 87 ++++++++++++++++++++--- 5 files changed, 198 insertions(+), 201 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 369930497401..77f21c65bbca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -445,34 +445,16 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -/* Initialize RCV_MSS value. - * RCV_MSS is an our guess about MSS used by the peer. - * We haven't any direct information about the MSS. - * It's better to underestimate the RCV_MSS rather than overestimate. - * Overestimations make us ACKing less frequently than needed. - * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). - */ +extern void tcp_initialize_rcv_mss(struct sock *sk); -static inline void tcp_initialize_rcv_mss(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); - - hint = min(hint, tp->rcv_wnd/2); - hint = min(hint, TCP_MIN_RCVMSS); - hint = max(hint, TCP_MIN_MSS); - - inet_csk(sk)->icsk_ack.rcv_mss = hint; -} - -static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); } -static __inline__ void tcp_fast_path_on(struct tcp_sock *tp) +static inline void tcp_fast_path_on(struct tcp_sock *tp) { __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); } @@ -490,7 +472,7 @@ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp) * Rcv_nxt can be after the window if our peer push more data * than the offered window. */ -static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp) +static inline u32 tcp_receive_window(const struct tcp_sock *tp) { s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; @@ -662,6 +644,7 @@ extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); extern int tcp_set_congestion_control(struct sock *sk, const char *name); +extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); @@ -701,7 +684,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) * "Packets left network, but not honestly ACKed yet" PLUS * "Packets fast retransmitted" */ -static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) +static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { return (tp->packets_out - tp->left_out + tp->retrans_out); } @@ -721,33 +704,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) (tp->snd_cwnd >> 2))); } -/* - * Linear increase during slow start - */ -static inline void tcp_slow_start(struct tcp_sock *tp) -{ - if (sysctl_tcp_abc) { - /* RFC3465: Slow Start - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (tp->bytes_acked < tp->mss_cache) - return; - - /* We MAY increase by 2 if discovered delayed ack */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } - tp->bytes_acked = 0; - - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; -} - - static inline void tcp_sync_left_out(struct tcp_sock *tp) { if (tp->rx_opt.sack_ok && @@ -756,34 +712,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -/* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - tp->undo_marker = 0; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + 1U); - tp->snd_cwnd_cnt = 0; - tp->high_seq = tp->snd_nxt; - tp->snd_cwnd_stamp = tcp_time_stamp; - TCP_ECN_queue_cwr(tp); -} - -static inline void tcp_enter_cwr(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tp->prior_ssthresh = 0; - tp->bytes_acked = 0; - if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - } -} - +extern void tcp_enter_cwr(struct sock *sk); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that @@ -815,14 +744,14 @@ static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) return left <= tcp_max_burst(tp); } -static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, - const struct sk_buff *skb) +static inline void tcp_minshall_update(struct tcp_sock *tp, int mss, + const struct sk_buff *skb) { if (skb->len < mss) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } -static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) @@ -830,18 +759,18 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t icsk->icsk_rto, TCP_RTO_MAX); } -static __inline__ void tcp_push_pending_frames(struct sock *sk, - struct tcp_sock *tp) +static inline void tcp_push_pending_frames(struct sock *sk, + struct tcp_sock *tp) { __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); } -static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; } -static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; } @@ -849,19 +778,19 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) /* * Calculate(/check) TCP checksum */ -static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len, - unsigned long saddr, unsigned long daddr, - unsigned long base) +static inline u16 tcp_v4_check(struct tcphdr *th, int len, + unsigned long saddr, unsigned long daddr, + unsigned long base) { return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); } -static __inline__ int __tcp_checksum_complete(struct sk_buff *skb) +static inline int __tcp_checksum_complete(struct sk_buff *skb) { return __skb_checksum_complete(skb); } -static __inline__ int tcp_checksum_complete(struct sk_buff *skb) +static inline int tcp_checksum_complete(struct sk_buff *skb) { return skb->ip_summed != CHECKSUM_UNNECESSARY && __tcp_checksum_complete(skb); @@ -869,7 +798,7 @@ static __inline__ int tcp_checksum_complete(struct sk_buff *skb) /* Prequeue for VJ style copy to user, combined with checksumming. */ -static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) +static inline void tcp_prequeue_init(struct tcp_sock *tp) { tp->ucopy.task = NULL; tp->ucopy.len = 0; @@ -885,7 +814,7 @@ static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) * * NOTE: is this not too big to inline? */ -static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) +static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -926,7 +855,7 @@ static const char *statename[]={ }; #endif -static __inline__ void tcp_set_state(struct sock *sk, int state) +static inline void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; @@ -960,7 +889,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) #endif } -static __inline__ void tcp_done(struct sock *sk) +static inline void tcp_done(struct sock *sk) { tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); @@ -973,81 +902,13 @@ static __inline__ void tcp_done(struct sock *sk) inet_csk_destroy_sock(sk); } -static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) +static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; rx_opt->eff_sacks = 0; rx_opt->num_sacks = 0; } -static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp) -{ - if (tp->rx_opt.tstamp_ok) { - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); - *ptr++ = htonl(tp->rx_opt.ts_recent); - } - if (tp->rx_opt.eff_sacks) { - struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; - int this_sack; - - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK << 8) | - (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * - TCPOLEN_SACK_PERBLOCK))); - for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { - *ptr++ = htonl(sp[this_sack].start_seq); - *ptr++ = htonl(sp[this_sack].end_seq); - } - if (tp->rx_opt.dsack) { - tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks--; - } - } -} - -/* Construct a tcp options header for a SYN or SYN_ACK packet. - * If this is every changed make sure to change the definition of - * MAX_SYN_SIZE to match the new maximum number of options that you - * can generate. - */ -static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, - int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) -{ - /* We always get an MSS option. - * The option bytes which will be seen in normal data - * packets should timestamps be used, must be in the MSS - * advertised. But we subtract them from tp->mss_cache so - * that calculations in tcp_sendmsg are simpler etc. - * So account for this fact here if necessary. If we - * don't do this correctly, as a receiver we won't - * recognize data packets as being full sized when we - * should, and thus we won't abide by the delayed ACK - * rules correctly. - * SACKs don't matter, we never delay an ACK when we - * have any of those going out. - */ - *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); - if (ts) { - if(sack) - *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - else - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); /* TSVAL */ - *ptr++ = htonl(ts_recent); /* TSECR */ - } else if(sack) - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); - if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); -} - /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, @@ -1072,9 +933,9 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static __inline__ void tcp_openreq_init(struct request_sock *req, - struct tcp_options_received *rx_opt, - struct sk_buff *skb) +static inline void tcp_openreq_init(struct request_sock *req, + struct tcp_options_received *rx_opt, + struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index c7cc62c8dc12..e688c687d62d 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } + +/* + * Linear increase during slow start + */ +void tcp_slow_start(struct tcp_sock *tp) +{ + if (sysctl_tcp_abc) { + /* RFC3465: Slow Start + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (tp->bytes_acked < tp->mss_cache) + return; + + /* We MAY increase by 2 if discovered delayed ack */ + if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } + } + tp->bytes_acked = 0; + + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; +} +EXPORT_SYMBOL_GPL(tcp_slow_start); + /* * TCP Reno congestion control * This is special case used for fallback as well. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 981d1203b152..0a461232329f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1; /* Adapt the MSS value used to make delayed ack decision to the * real world. */ -static inline void tcp_measure_rcv_mss(struct sock *sk, - const struct sk_buff *skb) +static void tcp_measure_rcv_mss(struct sock *sk, + const struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); const unsigned int lss = icsk->icsk_ack.last_seg_size; @@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, return 0; } -static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, + struct sk_buff *skb) { /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && @@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); } + +/* Initialize RCV_MSS value. + * RCV_MSS is an our guess about MSS used by the peer. + * We haven't any direct information about the MSS. + * It's better to underestimate the RCV_MSS rather than overestimate. + * Overestimations make us ACKing less frequently than needed. + * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). + */ +void tcp_initialize_rcv_mss(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); + + hint = min(hint, tp->rcv_wnd/2); + hint = min(hint, TCP_MIN_RCVMSS); + hint = max(hint, TCP_MIN_MSS); + + inet_csk(sk)->icsk_ack.rcv_mss = hint; +} + /* Receiver "autotuning" code. * * The algorithm for RTT estimation w/o timestamps is based on @@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } +/* Set slow start threshold and cwnd not falling to slow start */ +void tcp_enter_cwr(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->prior_ssthresh = 0; + tp->bytes_acked = 0; + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + tp->undo_marker = 0; + tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + tp->snd_cwnd = min(tp->snd_cwnd, + tcp_packets_in_flight(tp) + 1U); + tp->snd_cwnd_cnt = 0; + tp->high_seq = tp->snd_nxt; + tp->snd_cwnd_stamp = tcp_time_stamp; + TCP_ECN_queue_cwr(tp); + + tcp_set_ca_state(sk, TCP_CA_CWR); + } +} + /* Initialize metrics on socket. */ static void tcp_init_metrics(struct sock *sk) @@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, flag); } -static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, - u32 in_flight, int good) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, + u32 in_flight, int good) { const struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); @@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, * RFC2988 recommends to restart timer to now+rto. */ -static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) +static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) { if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, return acked; } -static inline u32 tcp_usrtt(const struct sk_buff *skb) +static u32 tcp_usrtt(const struct sk_buff *skb) { struct timeval tv, now; @@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) +static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, + struct tcp_sock *tp) { if (th->doff == sizeof(struct tcphdr)>>2) { tp->rx_opt.saw_tstamp = 0; @@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) } } -static __inline__ int -tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) +static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) { if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { if (before(seq, sp->start_seq)) @@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) return 0; } -static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { if (before(seq, tp->rcv_nxt)) @@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) } } -static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) { if (!tp->rx_opt.dsack) tcp_dsack_set(tp, seq, end_seq); @@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) } } -static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) +static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) { __u32 tmp; @@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) +static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) { /* If the user specified a specific send buffer setting, do * not modify it. @@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk) sk->sk_write_space(sk); } -static inline void tcp_check_space(struct sock *sk) +static void tcp_check_space(struct sock *sk) { if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); @@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk) } } -static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) { tcp_push_pending_frames(sk, tp); tcp_check_space(sk); @@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) } } -static __inline__ void tcp_ack_snd_check(struct sock *sk) +static inline void tcp_ack_snd_check(struct sock *sk) { if (!inet_csk_ack_scheduled(sk)) { /* We sent a data segment already. */ @@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) return result; } -static __inline__ int -tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) +static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) { return skb->ip_summed != CHECKSUM_UNNECESSARY && __tcp_checksum_complete_user(sk, skb); @@ -4474,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc); EXPORT_SYMBOL(tcp_parse_options); EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); +EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9b62d80bb20f..5c70493dff02 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -270,8 +270,7 @@ failure: /* * This routine does path mtu discovery as defined in RFC1191. */ -static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, - u32 mtu) +static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); @@ -662,7 +661,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static inline void syn_flood_warning(struct sk_buff *skb) +static void syn_flood_warning(struct sk_buff *skb) { static unsigned long warntime; @@ -677,8 +676,8 @@ static inline void syn_flood_warning(struct sk_buff *skb) /* * Save and compile IPv4 options into the request_sock if needed. */ -static inline struct ip_options *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) +static struct ip_options *tcp_v4_save_options(struct sock *sk, + struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); struct ip_options *dopt = NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3a0a914de917..a7623ead39a8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1; */ int sysctl_tcp_tso_win_divisor = 3; -static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static void update_send_head(struct sock *sk, struct tcp_sock *tp, + struct sk_buff *skb) { sk->sk_send_head = skb->next; if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) @@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) tp->snd_cwnd_used = 0; } -static inline void tcp_event_data_sent(struct tcp_sock *tp, - struct sk_buff *skb, struct sock *sk) +static void tcp_event_data_sent(struct tcp_sock *tp, + struct sk_buff *skb, struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; @@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp, icsk->icsk_ack.pingpong = 1; } -static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) +static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); @@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * value can be stuffed directly into th->window for an outgoing * frame. */ -static __inline__ u16 tcp_select_window(struct sock *sk) +static u16 tcp_select_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 cur_win = tcp_receive_window(tp); @@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk) return new_win; } +static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, + __u32 tstamp) +{ + if (tp->rx_opt.tstamp_ok) { + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *ptr++ = htonl(tstamp); + *ptr++ = htonl(tp->rx_opt.ts_recent); + } + if (tp->rx_opt.eff_sacks) { + struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; + int this_sack; + + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_SACK << 8) | + (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * + TCPOLEN_SACK_PERBLOCK))); + for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { + *ptr++ = htonl(sp[this_sack].start_seq); + *ptr++ = htonl(sp[this_sack].end_seq); + } + if (tp->rx_opt.dsack) { + tp->rx_opt.dsack = 0; + tp->rx_opt.eff_sacks--; + } + } +} + +/* Construct a tcp options header for a SYN or SYN_ACK packet. + * If this is every changed make sure to change the definition of + * MAX_SYN_SIZE to match the new maximum number of options that you + * can generate. + */ +static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, + int offer_wscale, int wscale, __u32 tstamp, + __u32 ts_recent) +{ + /* We always get an MSS option. + * The option bytes which will be seen in normal data + * packets should timestamps be used, must be in the MSS + * advertised. But we subtract them from tp->mss_cache so + * that calculations in tcp_sendmsg are simpler etc. + * So account for this fact here if necessary. If we + * don't do this correctly, as a receiver we won't + * recognize data packets as being full sized when we + * should, and thus we won't abide by the delayed ACK + * rules correctly. + * SACKs don't matter, we never delay an ACK when we + * have any of those going out. + */ + *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + if (ts) { + if(sack) + *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + else + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *ptr++ = htonl(tstamp); /* TSVAL */ + *ptr++ = htonl(ts_recent); /* TSECR */ + } else if(sack) + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); + if (offer_wscale) + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); +} /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial @@ -724,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) /* Congestion window validation. (RFC2861) */ -static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) +static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) { __u32 packets_out = tp->packets_out; @@ -773,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk /* This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) +static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); @@ -1794,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* * Do all connect socket setups that can be done AF independent. */ -static inline void tcp_connect_init(struct sock *sk) +static void tcp_connect_init(struct sock *sk) { struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.3-70-g09d2