From 183f732c3f3f307d5673e17b69de6894e1dd2918 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 6 Dec 2010 15:56:17 +0200 Subject: Bluetooth: Fix initial RFCOMM DLC security level Due to commit 63ce0900 connections initiated through TTYs created with "rfcomm bind ..." would have security level BT_SECURITY_SDP instead of BT_SECURITY_LOW. This would cause instant connection failure between any two SSP capable devices due to the L2CAP connect request to RFCOMM being sent before authentication has been performed. This patch fixes the regression by always initializing the DLC security level to BT_SECURITY_LOW. Signed-off-by: Johan Hedberg Acked-by: Luiz Augusto von Dentz Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index fa642aa652bd..432a9a633e8d 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -311,6 +311,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) d->state = BT_OPEN; d->flags = 0; d->mscex = 0; + d->sec_level = BT_SECURITY_LOW; d->mtu = RFCOMM_DEFAULT_MTU; d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV; -- cgit v1.2.3-70-g09d2 From c926d006c1514cfb3572893f41f2324e96823661 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 9 Dec 2010 10:43:13 -0800 Subject: mac80211: Fix NULL-pointer deference on ibss merge when not ready dev_open will eventually call ieee80211_ibss_join which sets up the skb used for beacons/probe-responses however it is possible to receive beacons that attempt to merge before this occurs causing a null pointer dereference. Check ssid_len as that is the last thing set in ieee80211_ibss_join. This occurs quite easily in the presence of adhoc nodes with hidden SSID's revised previous patch to check further up based on irc feedback Signed-off-by: Tim Harvey Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 239c4836a946..077a93dd1671 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -780,6 +780,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->u.ibss.mtx); + if (!sdata->u.ibss.ssid_len) + goto mgmt_out; /* not ready to merge yet */ + switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); @@ -797,6 +800,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; } + mgmt_out: mutex_unlock(&sdata->u.ibss.mtx); } -- cgit v1.2.3-70-g09d2 From 8808f64171deec62346888e156e3adb636e2a31a Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski Date: Mon, 13 Dec 2010 11:43:51 -0200 Subject: mac80211: avoid calling ieee80211_work_work unconditionally On suspend, there might be usb wireless drivers which wrongly trigger the warning in ieee80211_work_work. If an usb driver doesn't have a suspend hook, the usb stack will disconnect the device. On disconnect, a mac80211 driver calls ieee80211_unregister_hw, which calls dev_close, which calls ieee80211_stop, and in the end calls ieee80211_work_purge-> ieee80211_work_work. The problem is that this call to ieee80211_work_purge comes after mac80211 is suspended, triggering the warning even when we don't have work queued in work_list (the expected case when already suspended), because it always calls ieee80211_work_work. So, just call ieee80211_work_work in ieee80211_work_purge if we really have to abort work. This addresses the warning reported at https://bugzilla.kernel.org/show_bug.cgi?id=24402 Signed-off-by: Herton Ronaldo Krzesinski Signed-off-by: John W. Linville --- net/mac80211/work.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/work.c b/net/mac80211/work.c index ae344d1ba056..146097cb43a7 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -1051,11 +1051,13 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_work *wk; + bool cleanup = false; mutex_lock(&local->mtx); list_for_each_entry(wk, &local->work_list, list) { if (wk->sdata != sdata) continue; + cleanup = true; wk->type = IEEE80211_WORK_ABORT; wk->started = true; wk->timeout = jiffies; @@ -1063,7 +1065,8 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->mtx); /* run cleanups etc. */ - ieee80211_work_work(&local->work_work); + if (cleanup) + ieee80211_work_work(&local->work_work); mutex_lock(&local->mtx); list_for_each_entry(wk, &local->work_list, list) { -- cgit v1.2.3-70-g09d2 From d96c9043d1588f04c7f467167f653c07d83232d5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Dec 2010 20:30:28 -0800 Subject: ceph: fix msgr_init error path create_workqueue() returns NULL on failure. Signed-off-by: Sage Weil --- net/ceph/messenger.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1c7a2ec4f3cc..b6ff4a1519ab 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -97,11 +97,9 @@ struct workqueue_struct *ceph_msgr_wq; int ceph_msgr_init(void) { ceph_msgr_wq = create_workqueue("ceph-msgr"); - if (IS_ERR(ceph_msgr_wq)) { - int ret = PTR_ERR(ceph_msgr_wq); - pr_err("msgr_init failed to create workqueue: %d\n", ret); - ceph_msgr_wq = NULL; - return ret; + if (!ceph_msgr_wq) { + pr_err("msgr_init failed to create workqueue\n"); + return -ENOMEM; } return 0; } -- cgit v1.2.3-70-g09d2 From d3052b557a1c94c21f50465702fa886753ce6b43 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Sat, 11 Dec 2010 15:20:11 +0000 Subject: ipv6: delete expired route in ip6_pmtu_deliver The first big packets sent to a "low-MTU" client correctly triggers the creation of a temporary route containing the reduced MTU. But after the temporary route has expired, new ICMP6 "packet too big" will be sent, rt6_pmtu_discovery will find the previous EXPIRED route check that its mtu isn't bigger then in icmp packet and do nothing before the temporary route will not deleted by gc. I make the simple experiment: while :; do time ( dd if=/dev/zero bs=10K count=1 | ssh hostname dd of=/dev/null ) || break; done The "time" reports real 0m0.197s if a temporary route isn't expired, but it reports real 0m52.837s (!!!!) immediately after a temporare route has expired. Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96455ffb76fb..7659d6f16e6b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1565,11 +1565,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, { struct rt6_info *rt, *nrt; int allfrag = 0; - +again: rt = rt6_lookup(net, daddr, saddr, ifindex, 0); if (rt == NULL) return; + if (rt6_check_expired(rt)) { + ip6_del_rt(rt); + goto again; + } + if (pmtu >= dst_mtu(&rt->dst)) goto out; -- cgit v1.2.3-70-g09d2 From fcbdf09d9652c8919dcf47072e3ae7dcb4eb98ac Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 16 Dec 2010 14:26:56 -0800 Subject: net: fix nulls list corruptions in sk_prot_alloc Special care is taken inside sk_port_alloc to avoid overwriting skc_node/skc_nulls_node. We should also avoid overwriting skc_bind_node/skc_portaddr_node. The patch fixes the following crash: BUG: unable to handle kernel paging request at fffffffffffffff0 IP: [] udp4_lib_lookup2+0xad/0x370 [] __udp4_lib_lookup+0x282/0x360 [] __udp4_lib_rcv+0x31e/0x700 [] ? ip_local_deliver_finish+0x65/0x190 [] ? ip_local_deliver+0x88/0xa0 [] udp_rcv+0x15/0x20 [] ip_local_deliver_finish+0x65/0x190 [] ip_local_deliver+0x88/0xa0 [] ip_rcv_finish+0x32d/0x6f0 [] ? netif_receive_skb+0x99c/0x11c0 [] ip_rcv+0x2bb/0x350 [] netif_receive_skb+0x99c/0x11c0 Signed-off-by: Leonard Crestez Signed-off-by: Octavian Purdila Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ net/core/sock.c | 47 +++++++++++++++++++++++++++++++++++------------ net/ipv4/udp.c | 1 + net/ipv4/udplite.c | 1 + net/ipv6/udp.c | 1 + net/ipv6/udplite.c | 1 + 6 files changed, 42 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index 659d968d95c5..7d3f7ce239b5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -754,6 +754,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk) sk->sk_prot->hash(sk); } +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) diff --git a/net/core/sock.c b/net/core/sock.c index fb6080111461..e5af8d5d5b50 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } +/* + * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * un-modified. Special care is taken when initializing object to zero. + */ +static inline void sk_prot_clear_nulls(struct sock *sk, int size) +{ + if (offsetof(struct sock, sk_node.next) != 0) + memset(sk, 0, offsetof(struct sock, sk_node.next)); + memset(&sk->sk_node.pprev, 0, + size - offsetof(struct sock, sk_node.pprev)); +} + +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) +{ + unsigned long nulls1, nulls2; + + nulls1 = offsetof(struct sock, __sk_common.skc_node.next); + nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); + if (nulls1 > nulls2) + swap(nulls1, nulls2); + + if (nulls1 != 0) + memset((char *)sk, 0, nulls1); + memset((char *)sk + nulls1 + sizeof(void *), 0, + nulls2 - nulls1 - sizeof(void *)); + memset((char *)sk + nulls2 + sizeof(void *), 0, + size - nulls2 - sizeof(void *)); +} +EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); + static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { @@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!sk) return sk; if (priority & __GFP_ZERO) { - /* - * caches using SLAB_DESTROY_BY_RCU should let - * sk_node.next un-modified. Special care is taken - * when initializing object to zero. - */ - if (offsetof(struct sock, sk_node.next) != 0) - memset(sk, 0, offsetof(struct sock, sk_node.next)); - memset(&sk->sk_node.pprev, 0, - prot->obj_size - offsetof(struct sock, - sk_node.pprev)); + if (prot->clear_sk) + prot->clear_sk(sk, prot->obj_size); + else + sk_prot_clear_nulls(sk, prot->obj_size); } - } - else + } else sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5e0a3a582a59..2d3ded4d0786 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1899,6 +1899,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index ab76aa928fa9..aee9963f7f5a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -57,6 +57,7 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93bec85..cd6cb7c3e563 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1477,6 +1477,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5f48fadc27f7..986c4de5292e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -55,6 +55,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { -- cgit v1.2.3-70-g09d2 From 76d661586c8131453ba75a2e027c1f21511a893a Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 14 Dec 2010 08:42:16 +0000 Subject: bridge: fix IPv6 queries for bridge multicast snooping This patch fixes a missing ntohs() for bridge IPv6 multicast snooping. Signed-off-by: David L Stevens Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eb5b256ffc88..f19e347f56f6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -437,7 +437,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h = ipv6_hdr(skb); *(__force __be32 *)ip6h = htonl(0x60000000); - ip6h->payload_len = 8 + sizeof(*mldq); + ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); -- cgit v1.2.3-70-g09d2 From 7d743b7e952261f4d9ee091100b6403f3ce8a2af Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 14 Dec 2010 16:10:41 +0000 Subject: sctp: fix the return value of getting the sctp partial delivery point Get the sctp partial delivery point using SCTP_PARTIAL_DELIVERY_POINT socket option should return 0 if success, not -ENOTSUPP. Signed-off-by: Wei Yongjun Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0b9ee34ad35c..fff0926b1111 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5053,7 +5053,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len, if (copy_to_user(optval, &val, len)) return -EFAULT; - return -ENOTSUPP; + return 0; } /* -- cgit v1.2.3-70-g09d2 From 29ba5fed1bbd09c2cba890798c8f9eaab251401d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Dec 2010 11:28:12 +0000 Subject: ipv6: don't flush routes when setting loopback down When loopback device is being brought down, then keep the route table entries because they are special. The entries in the local table for linklocal routes and ::1 address should not be purged. This is a sub optimal solution to the problem and should be replaced by a better fix in future. Signed-off-by: Stephen Hemminger Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93b7a933a775..848b35591042 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2669,7 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - rt6_ifdown(net, dev); + /* Flush routes if device is being removed or it is not loopback */ + if (how || !(dev->flags & IFF_LOOPBACK)) + rt6_ifdown(net, dev); neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); -- cgit v1.2.3-70-g09d2 From b6aa5901c7a2bd90d0b6b9866300d2648b2568f3 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Wed, 15 Dec 2010 20:45:41 -0800 Subject: ceph: mark user pages dirty on direct-io reads For read operation, we have to set the argument _write_ of get_user_pages to 1 since we will write data to pages. Also, we need to SetPageDirty before releasing these pages. Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/file.c | 8 ++++---- include/linux/ceph/libceph.h | 6 ++++-- net/ceph/pagevec.c | 11 +++++++---- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e860d8f1bb45..7d0e4a82d898 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -384,7 +384,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, if (file->f_flags & O_DIRECT) { num_pages = calc_pages_for((unsigned long)data, len); - pages = ceph_get_direct_page_vector(data, num_pages); + pages = ceph_get_direct_page_vector(data, num_pages, true); } else { num_pages = calc_pages_for(off, len); pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); @@ -413,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, done: if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, true); else ceph_release_page_vector(pages, num_pages); dout("sync_read result %d\n", ret); @@ -522,7 +522,7 @@ more: return -ENOMEM; if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages); + pages = ceph_get_direct_page_vector(data, num_pages, false); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -572,7 +572,7 @@ more: } if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, false); else if (file->f_flags & O_SYNC) ceph_release_page_vector(pages, num_pages); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9e76d35670d2..72c72bfccb88 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -227,8 +227,10 @@ extern int ceph_open_session(struct ceph_client *client); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages); -extern void ceph_put_page_vector(struct page **pages, int num_pages); + int num_pages, + bool write_page); +extern void ceph_put_page_vector(struct page **pages, int num_pages, + bool dirty); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index ac34feeb2b3a..01947a5d03b7 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -13,7 +13,7 @@ * build a vector of user pages */ struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages) + int num_pages, bool write_page) { struct page **pages; int rc; @@ -24,7 +24,7 @@ struct page **ceph_get_direct_page_vector(const char __user *data, down_read(¤t->mm->mmap_sem); rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, 0, 0, pages, NULL); + num_pages, write_page, 0, pages, NULL); up_read(¤t->mm->mmap_sem); if (rc < 0) goto fail; @@ -36,12 +36,15 @@ fail: } EXPORT_SYMBOL(ceph_get_direct_page_vector); -void ceph_put_page_vector(struct page **pages, int num_pages) +void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) { int i; - for (i = 0; i < num_pages; i++) + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty_lock(pages[i]); put_page(pages[i]); + } kfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); -- cgit v1.2.3-70-g09d2 From 361cf40519a491f68b28ad90225e4611c4bf8e12 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Fri, 17 Dec 2010 09:55:59 -0800 Subject: ceph: handle partial result from get_user_pages The get_user_pages() helper can return fewer than the requested pages. Error out in that case, and clean up the partial result. Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- net/ceph/pagevec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 01947a5d03b7..1a040e64c69f 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -26,12 +26,12 @@ struct page **ceph_get_direct_page_vector(const char __user *data, rc = get_user_pages(current, current->mm, (unsigned long)data, num_pages, write_page, 0, pages, NULL); up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < num_pages) goto fail; return pages; fail: - kfree(pages); + ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); return ERR_PTR(rc); } EXPORT_SYMBOL(ceph_get_direct_page_vector); -- cgit v1.2.3-70-g09d2 From ad0081e43af6de3fecf308b0d098f9611835766b Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 17 Dec 2010 11:42:42 +0000 Subject: ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed. This patch modifies IPsec6 to fragment IPv6 packets that are locally generated as needed. This version of the patch only fragments in tunnel mode, so that fragment headers will not be obscured by ESP in transport mode. Signed-off-by: David L Stevens Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ip6_route.h | 10 ++++++++++ net/ipv6/ip6_output.c | 12 ++---------- net/ipv6/xfrm6_output.c | 16 +++++++++++++++- 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 278312c95f96..2ab926860cd8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); + +static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +{ + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + + return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +} + #endif #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 99157b4cd56e..94b5bf132b2e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include #include -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); int __ip6_local_out(struct sk_buff *skb) { @@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) -{ - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - - return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || @@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6434bd5ce088..8e688b3de9ab 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,7 @@ #include #include #include +#include #include int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb) return xfrm_output(skb); } +static int __xfrm6_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + if ((x && x->props.mode == XFRM_MODE_TUNNEL) && + ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + dst_allfrag(skb_dst(skb)))) { + return ip6_fragment(skb, xfrm6_output_finish); + } + return xfrm6_output_finish(skb); +} + int xfrm6_output(struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, xfrm6_output_finish); + skb_dst(skb)->dev, __xfrm6_output); } -- cgit v1.2.3-70-g09d2 From aa3e219997e4b949be4199660936099ded0b401f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2010 13:18:16 -0800 Subject: net_sched: sch_sfq: fix allot handling When deploying SFQ/IFB here at work, I found the allot management was pretty wrong in sfq, even changing allot from short to int... We should init allot for each new flow, not using a previous value found in slot. Before patch, I saw bursts of several packets per flow, apparently denying the default "quantum 1514" limit I had on my SFQ class. class sfq 11:1 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 7p requeues 0 allot 11546 class sfq 11:46 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 1p requeues 0 allot -23873 class sfq 11:78 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 5p requeues 0 allot 11393 After patch, better fairness among each flow, allot limit being respected, allot is positive : class sfq 11:e parent 11: (dropped 0, overlimits 0 requeues 86) backlog 0b 3p requeues 86 allot 596 class sfq 11:94 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 3p requeues 0 allot 1468 class sfq 11:a4 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 4p requeues 0 allot 650 class sfq 11:bb parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 3p requeues 0 allot 596 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3cf478d012dd..7150705f1d0b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -270,7 +270,6 @@ static unsigned int sfq_drop(struct Qdisc *sch) /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ d = q->next[q->tail]; q->next[q->tail] = q->next[d]; - q->allot[q->next[d]] += q->quantum; skb = q->qs[d].prev; len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[d]); @@ -321,14 +320,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; q->next[x] = x; - q->allot[x] = q->quantum; } else { q->next[x] = q->next[q->tail]; q->next[q->tail] = x; - q->tail = x; } + q->tail = x; + q->allot[x] = q->quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -359,13 +357,13 @@ sfq_dequeue(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - sfq_index a, old_a; + sfq_index a, next_a; /* No active slots */ if (q->tail == SFQ_DEPTH) return NULL; - a = old_a = q->next[q->tail]; + a = q->next[q->tail]; /* Grab packet */ skb = __skb_dequeue(&q->qs[a]); @@ -376,17 +374,15 @@ sfq_dequeue(struct Qdisc *sch) /* Is the slot empty? */ if (q->qs[a].qlen == 0) { q->ht[q->hash[a]] = SFQ_DEPTH; - a = q->next[a]; - if (a == old_a) { + next_a = q->next[a]; + if (a == next_a) { q->tail = SFQ_DEPTH; return skb; } - q->next[q->tail] = a; - q->allot[a] += q->quantum; + q->next[q->tail] = next_a; } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { - q->tail = a; - a = q->next[a]; q->allot[a] += q->quantum; + q->tail = a; } return skb; } -- cgit v1.2.3-70-g09d2 From b51aff057c9d0ef6c529dc25fd9f775faf7b6c63 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 22 Dec 2010 10:15:07 +0100 Subject: mac80211: fix mesh forwarding Under memory pressure, the mac80211 mesh code may helpfully print a message that it failed to clone a mesh frame and then will proceed to crash trying to use it anyway. Fix that. Cc: stable@kernel.org [2.6.27+] Signed-off-by: Johannes Berg Acked-by: Javier Cardona Signed-off-by: John W. Linville --- net/mac80211/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 54fb4a0e76f0..b01e467b76c6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1788,9 +1788,11 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) fwd_skb = skb_copy(skb, GFP_ATOMIC); - if (!fwd_skb && net_ratelimit()) + if (!fwd_skb && net_ratelimit()) { printk(KERN_DEBUG "%s: failed to clone mesh frame\n", sdata->name); + goto out; + } fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); @@ -1828,6 +1830,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } } + out: if (is_multicast_ether_addr(hdr->addr1) || sdata->dev->flags & IFF_PROMISC) return RX_CONTINUE; -- cgit v1.2.3-70-g09d2 From 1bde5ac49398a064c753bb490535cfad89e99a5f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Dec 2010 09:32:46 -0800 Subject: tcp: fix listening_get_next() Alexey Vlasov found /proc/net/tcp could sometime loop and display millions of sockets in LISTEN state. In 2.6.29, when we converted TCP hash tables to RCU, we left two sk_next() calls in listening_get_next(). We must instead use sk_nulls_next() to properly detect an end of chain. Reported-by: Alexey Vlasov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e13da6de1fc7..d978bb2f748b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2030,7 +2030,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) get_req: req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; } - sk = sk_next(st->syn_wait_sk); + sk = sk_nulls_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { @@ -2039,7 +2039,7 @@ get_req: if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - sk = sk_next(sk); + sk = sk_nulls_next(sk); } get_sk: sk_nulls_for_each_from(sk, node) { -- cgit v1.2.3-70-g09d2 From fdac1e0697356ac212259f2147aa60c72e334861 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Wed, 22 Dec 2010 13:58:27 +0000 Subject: irda: prevent integer underflow in IRLMP_ENUMDEVICES If the user-provided len is less than the expected offset, the IRLMP_ENUMDEVICES getsockopt will do a copy_to_user() with a very large size value. While this isn't be a security issue on x86 because it will get caught by the access_ok() check, it may leak large amounts of kernel heap on other architectures. In any event, this patch fixes it. Signed-off-by: Dan Rosenberg Signed-off-by: David S. Miller --- net/irda/af_irda.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 7f097989cde2..5007541b6478 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2281,6 +2281,16 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case IRLMP_ENUMDEVICES: + + /* Offset to first device entry */ + offset = sizeof(struct irda_device_list) - + sizeof(struct irda_device_info); + + if (len < offset) { + err = -EINVAL; + goto out; + } + /* Ask lmp for the current discovery log */ discoveries = irlmp_get_discoveries(&list.len, self->mask.word, self->nslots); @@ -2291,15 +2301,9 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, } /* Write total list length back to client */ - if (copy_to_user(optval, &list, - sizeof(struct irda_device_list) - - sizeof(struct irda_device_info))) + if (copy_to_user(optval, &list, offset)) err = -EFAULT; - /* Offset to first device entry */ - offset = sizeof(struct irda_device_list) - - sizeof(struct irda_device_info); - /* Copy the list itself - watch for overflow */ if (list.len > 2048) { err = -EINVAL; -- cgit v1.2.3-70-g09d2 From e058464990c2ef1f3ecd6b83a154913c3c06f02a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 23 Dec 2010 12:03:57 -0800 Subject: Revert "ipv4: Allow configuring subnets as local addresses" This reverts commit 4465b469008bc03b98a1b8df4e9ae501b6c69d4b. Conflicts: net/ipv4/fib_frontend.c As reported by Ben Greear, this causes regressions: > Change 4465b469008bc03b98a1b8df4e9ae501b6c69d4b caused rules > to stop matching the input device properly because the > FLOWI_FLAG_MATCH_ANY_IIF is always defined in ip_dev_find(). > > This breaks rules such as: > > ip rule add pref 512 lookup local > ip rule del pref 0 lookup local > ip link set eth2 up > ip -4 addr add 172.16.0.102/24 broadcast 172.16.0.255 dev eth2 > ip rule add to 172.16.0.102 iif eth2 lookup local pref 10 > ip rule add iif eth2 lookup 10001 pref 20 > ip route add 172.16.0.0/24 dev eth2 table 10001 > ip route add unreachable 0/0 table 10001 > > If you had a second interface 'eth0' that was on a different > subnet, pinging a system on that interface would fail: > > [root@ct503-60 ~]# ping 192.168.100.1 > connect: Invalid argument Reported-by: Ben Greear Signed-off-by: David S. Miller --- include/net/flow.h | 1 - net/core/fib_rules.c | 3 +-- net/ipv4/fib_frontend.c | 10 ++++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/flow.h b/include/net/flow.h index 0ac3fb5e0973..bb08692a20b0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,7 +49,6 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 82a4369ae150..a20e5d3bbfa0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif) && - !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF)) + if (rule->iifindex && (rule->iifindex != fl->iif)) goto out; if (rule->oifindex && (rule->oifindex != fl->oif)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..c19c1f739fba 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) .daddr = addr } }, - .flags = FLOWI_FLAG_MATCH_ANY_IIF }; struct fib_result res = { 0 }; struct net_device *dev = NULL; + struct fib_table *local_table; + +#ifdef CONFIG_IP_MULTIPLE_TABLES + res.r = NULL; +#endif rcu_read_lock(); - if (fib_lookup(net, &fl, &res)) { + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (!local_table || + fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { rcu_read_unlock(); return NULL; } -- cgit v1.2.3-70-g09d2 From fc75fc8339e7727167443469027540b283daac71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Dec 2010 04:39:39 +0000 Subject: ipv4: dont create routes on down devices In ip_route_output_slow(), instead of allowing a route to be created on a not UPed device, report -ENETUNREACH immediately. # ip tunnel add mode ipip remote 10.16.0.164 local 10.16.0.72 dev eth0 # (Note : tunl1 is down) # ping -I tunl1 10.1.2.3 PING 10.1.2.3 (10.1.2.3) from 192.168.18.5 tunl1: 56(84) bytes of data. (nothing) # ./a.out tunl1 # ip tunnel del tunl1 Message from syslogd@shelby at Dec 22 10:12:08 ... kernel: unregister_netdevice: waiting for tunl1 to become free. Usage count = 3 After patch: # ping -I tunl1 10.1.2.3 connect: Network is unreachable Reported-by: Nicolas Dichtel Signed-off-by: Eric Dumazet Reviewed-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv4/route.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 987bf9adb318..df948b0f1ac9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2585,9 +2585,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, goto out; /* RACE: Check return value of inet_select_addr instead. */ - if (rcu_dereference(dev_out->ip_ptr) == NULL) - goto out; /* Wrong error code */ - + if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { + err = -ENETUNREACH; + goto out; + } if (ipv4_is_local_multicast(oldflp->fl4_dst) || ipv4_is_lbcast(oldflp->fl4_dst)) { if (!fl.fl4_src) -- cgit v1.2.3-70-g09d2 From 9f260e0efa4766e56d0ac14f1aeea6ee5eb8fe83 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sun, 26 Dec 2010 06:54:53 +0000 Subject: CAN: Use inode instead of kernel address for /proc file Since the socket address is just being used as a unique identifier, its inode number is an alternative that does not leak potentially sensitive information. CC-ing stable because MITRE has assigned CVE-2010-4565 to the issue. Signed-off-by: Dan Rosenberg Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 6faa8256e10c..9d5e8accfab1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -125,7 +125,7 @@ struct bcm_sock { struct list_head tx_ops; unsigned long dropped_usr_msgs; struct proc_dir_entry *bcm_proc_read; - char procname [20]; /* pointer printed in ASCII with \0 */ + char procname [32]; /* inode number in decimal with \0 */ }; static inline struct bcm_sock *bcm_sk(const struct sock *sk) @@ -1521,7 +1521,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, if (proc_dir) { /* unique socket address as filename */ - sprintf(bo->procname, "%p", sock); + sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); -- cgit v1.2.3-70-g09d2 From 9d89081d698132b5f964aea88112f76492563ee9 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 3 Jan 2011 11:26:08 -0800 Subject: bridge: fix br_multicast_ipv6_rcv for paged skbs use pskb_may_pull to access ipv6 header correctly for paged skbs It was omitted in the bridge code leading to crash in blind __skb_pull since the skb is cloned undonditionally we also simplify the the exit path this fixes bug https://bugzilla.kernel.org/show_bug.cgi?id=25202 Dec 15 14:36:40 User-PC hostapd: wlan0: STA 00:15:00:60:5d:34 IEEE 802.11: authenticated Dec 15 14:36:40 User-PC hostapd: wlan0: STA 00:15:00:60:5d:34 IEEE 802.11: associated (aid 2) Dec 15 14:36:40 User-PC hostapd: wlan0: STA 00:15:00:60:5d:34 RADIUS: starting accounting session 4D0608A3-00000005 Dec 15 14:36:41 User-PC kernel: [175576.120287] ------------[ cut here ]------------ Dec 15 14:36:41 User-PC kernel: [175576.120452] kernel BUG at include/linux/skbuff.h:1178! Dec 15 14:36:41 User-PC kernel: [175576.120609] invalid opcode: 0000 [#1] SMP Dec 15 14:36:41 User-PC kernel: [175576.120749] last sysfs file: /sys/devices/pci0000:00/0000:00:1f.2/host0/target0:0:0/0:0:0:0/block/sda/uevent Dec 15 14:36:41 User-PC kernel: [175576.121035] Modules linked in: approvals binfmt_misc bridge stp llc parport_pc ppdev arc4 iwlagn snd_hda_codec_realtek iwlcore i915 snd_hda_intel mac80211 joydev snd_hda_codec snd_hwdep snd_pcm snd_seq_midi drm_kms_helper snd_rawmidi drm snd_seq_midi_event snd_seq snd_timer snd_seq_device cfg80211 eeepc_wmi usbhid psmouse intel_agp i2c_algo_bit intel_gtt uvcvideo agpgart videodev sparse_keymap snd shpchp v4l1_compat lp hid video serio_raw soundcore output snd_page_alloc ahci libahci atl1c Dec 15 14:36:41 User-PC kernel: [175576.122712] Dec 15 14:36:41 User-PC kernel: [175576.122769] Pid: 0, comm: kworker/0:0 Tainted: G W 2.6.37-rc5-wl+ #3 1015PE/1016P Dec 15 14:36:41 User-PC kernel: [175576.123012] EIP: 0060:[] EFLAGS: 00010283 CPU: 1 Dec 15 14:36:41 User-PC kernel: [175576.123193] EIP is at br_multicast_rcv+0xc95/0xe1c [bridge] Dec 15 14:36:41 User-PC kernel: [175576.123362] EAX: 0000001c EBX: f5626318 ECX: 00000000 EDX: 00000000 Dec 15 14:36:41 User-PC kernel: [175576.123550] ESI: ec512262 EDI: f5626180 EBP: f60b5ca0 ESP: f60b5bd8 Dec 15 14:36:41 User-PC kernel: [175576.123737] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Dec 15 14:36:41 User-PC kernel: [175576.123902] Process kworker/0:0 (pid: 0, ti=f60b4000 task=f60a8000 task.ti=f60b0000) Dec 15 14:36:41 User-PC kernel: [175576.124137] Stack: Dec 15 14:36:41 User-PC kernel: [175576.124181] ec556500 f6d06800 f60b5be8 c01087d8 ec512262 00000030 00000024 f5626180 Dec 15 14:36:41 User-PC kernel: [175576.124181] f572c200 ef463440 f5626300 3affffff f6d06dd0 e60766a4 000000c4 f6d06860 Dec 15 14:36:41 User-PC kernel: [175576.124181] ffffffff ec55652c 00000001 f6d06844 f60b5c64 c0138264 c016e451 c013e47d Dec 15 14:36:41 User-PC kernel: [175576.124181] Call Trace: Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? sched_clock+0x8/0x10 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? enqueue_entity+0x174/0x440 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? sched_clock_cpu+0x131/0x190 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? select_task_rq_fair+0x2ad/0x730 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? nf_iterate+0x71/0x90 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? br_handle_frame_finish+0x184/0x220 [bridge] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? br_handle_frame_finish+0x0/0x220 [bridge] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? br_handle_frame+0x189/0x230 [bridge] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? br_handle_frame_finish+0x0/0x220 [bridge] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? br_handle_frame+0x0/0x230 [bridge] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? __netif_receive_skb+0x1b6/0x5b0 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? skb_copy_bits+0x110/0x210 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? netif_receive_skb+0x6f/0x80 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? ieee80211_deliver_skb+0x8c/0x1a0 [mac80211] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? ieee80211_rx_handlers+0xeb6/0x1aa0 [mac80211] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? __netif_receive_skb+0x380/0x5b0 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? sched_clock_local+0xb2/0x190 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? default_spin_lock_flags+0x8/0x10 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? _raw_spin_lock_irqsave+0x2f/0x50 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? ieee80211_prepare_and_rx_handle+0x201/0xa90 [mac80211] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? ieee80211_rx+0x2a4/0x830 [mac80211] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? iwl_update_stats+0xa6/0x2a0 [iwlcore] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? iwlagn_rx_reply_rx+0x292/0x3b0 [iwlagn] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? _raw_spin_lock_irqsave+0x2f/0x50 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? iwl_rx_handle+0xe7/0x350 [iwlagn] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? iwl_irq_tasklet+0xf7/0x5c0 [iwlagn] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? __rcu_process_callbacks+0x201/0x2d0 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? tasklet_action+0xc5/0x100 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? __do_softirq+0x97/0x1d0 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? nmi_stack_correct+0x2f/0x34 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? __do_softirq+0x0/0x1d0 Dec 15 14:36:41 User-PC kernel: [175576.124181] Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? irq_exit+0x65/0x70 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? do_IRQ+0x52/0xc0 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? common_interrupt+0x30/0x38 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? intel_idle+0xc2/0x160 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? cpuidle_idle_call+0x6b/0x100 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? cpu_idle+0x8a/0xf0 Dec 15 14:36:41 User-PC kernel: [175576.124181] [] ? start_secondary+0x1e8/0x1ee Cc: David Miller Cc: Johannes Berg Cc: Stephen Hemminger Signed-off-by: Tomas Winkler Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f19e347f56f6..543b3262d002 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1430,7 +1430,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { - struct sk_buff *skb2 = skb; + struct sk_buff *skb2; struct ipv6hdr *ip6h; struct icmp6hdr *icmp6h; u8 nexthdr; @@ -1469,15 +1469,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, if (!skb2) return -ENOMEM; + err = -EINVAL; + if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) + goto out; + len -= offset - skb_network_offset(skb2); __skb_pull(skb2, offset); skb_reset_transport_header(skb2); - err = -EINVAL; - if (!pskb_may_pull(skb2, sizeof(*icmp6h))) - goto out; - icmp6h = icmp6_hdr(skb2); switch (icmp6h->icmp6_type) { @@ -1516,7 +1516,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, switch (icmp6h->icmp6_type) { case ICMPV6_MGM_REPORT: { - struct mld_msg *mld = (struct mld_msg *)icmp6h; + struct mld_msg *mld; + if (!pskb_may_pull(skb2, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; @@ -1529,15 +1534,18 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; case ICMPV6_MGM_REDUCTION: { - struct mld_msg *mld = (struct mld_msg *)icmp6h; + struct mld_msg *mld; + if (!pskb_may_pull(skb2, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca); } } out: - __skb_push(skb2, offset); - if (skb2 != skb) - kfree_skb(skb2); + kfree_skb(skb2); return err; } #endif -- cgit v1.2.3-70-g09d2 From e6f26129ebbb0071016e2526036f42036ccf30e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Jan 2011 04:16:28 +0000 Subject: bridge: stp: ensure mac header is set commit bf9ae5386bca8836c16e69ab8fdbe46767d7452a (llc: use dev_hard_header) removed the skb_reset_mac_header call from llc_mac_hdr_init. This seems fine itself, but br_send_bpdu() invokes ebtables LOCAL_OUT. We oops in ebt_basic_match() because it assumes eth_hdr(skb) returns a meaningful result. Cc: acme@ghostprotocols.net References: https://bugzilla.kernel.org/show_bug.cgi?id=24532 Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/bridge/br_stp_bpdu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 35cf27087b56..e3d7aefa9181 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -50,6 +50,8 @@ static void br_send_bpdu(struct net_bridge_port *p, llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); + skb_reset_mac_header(skb); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); } -- cgit v1.2.3-70-g09d2 From 9fc3bbb4a752f108cf096d96640f3b548bbbce6c Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Mon, 3 Jan 2011 20:24:20 +0000 Subject: ipv4/route.c: respect prefsrc for local routes The preferred source address is currently ignored for local routes, which results in all local connections having a src address that is the same as the local dst address. Fix this by respecting the preferred source address when it is provided for local routes. This bug can be demonstrated as follows: # ifconfig dummy0 192.168.0.1 # ip route show table local | grep local.*dummy0 local 192.168.0.1 dev dummy0 proto kernel scope host src 192.168.0.1 # ip route change table local local 192.168.0.1 dev dummy0 \ proto kernel scope host src 127.0.0.1 # ip route show table local | grep local.*dummy0 local 192.168.0.1 dev dummy0 proto kernel scope host src 127.0.0.1 We now establish a local connection and verify the source IP address selection: # nc -l 192.168.0.1 3128 & # nc 192.168.0.1 3128 & # netstat -ant | grep 192.168.0.1:3128.*EST tcp 0 0 192.168.0.1:3128 192.168.0.1:33228 ESTABLISHED tcp 0 0 192.168.0.1:33228 192.168.0.1:3128 ESTABLISHED Signed-off-by: Joel Sing Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df948b0f1ac9..93bfd95584f4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2649,8 +2649,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, } if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) - fl.fl4_src = fl.fl4_dst; + if (!fl.fl4_src) { + if (res.fi->fib_prefsrc) + fl.fl4_src = res.fi->fib_prefsrc; + else + fl.fl4_src = fl.fl4_dst; + } dev_out = net->loopback_dev; fl.oif = dev_out->ifindex; res.fi = NULL; -- cgit v1.2.3-70-g09d2 From 6650239a4b01077e80d5a4468562756d77afaa59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 8 Jan 2011 17:45:38 -0500 Subject: NFS: Don't use vm_map_ram() in readdir vm_map_ram() is not available on NOMMU platforms, and causes trouble on incoherrent architectures such as ARM when we access the page data through both the direct and the virtual mapping. The alternative is to use the direct mapping to access page data for the case when we are not crossing a page boundary, but to copy the data into a linear scratch buffer when we are accessing data that spans page boundaries. Signed-off-by: Trond Myklebust Tested-by: Marc Kleine-Budde Cc: stable@kernel.org [2.6.37] --- fs/nfs/dir.c | 44 ++++++------- fs/nfs/nfs2xdr.c | 6 -- fs/nfs/nfs3xdr.c | 6 -- fs/nfs/nfs4xdr.c | 6 -- include/linux/sunrpc/xdr.h | 4 +- net/sunrpc/xdr.c | 155 ++++++++++++++++++++++++++++++++++++--------- 6 files changed, 148 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 996dd8989a91..0108cf4f3403 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include "delegation.h" @@ -459,25 +458,26 @@ out: /* Perform conversion from xdr to cache array */ static int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, - void *xdr_page, struct page *page, unsigned int buflen) + struct page **xdr_pages, struct page *page, unsigned int buflen) { struct xdr_stream stream; - struct xdr_buf buf; - __be32 *ptr = xdr_page; + struct xdr_buf buf = { + .pages = xdr_pages, + .page_len = buflen, + .buflen = buflen, + .len = buflen, + }; + struct page *scratch; struct nfs_cache_array *array; unsigned int count = 0; int status; - buf.head->iov_base = xdr_page; - buf.head->iov_len = buflen; - buf.tail->iov_len = 0; - buf.page_base = 0; - buf.page_len = 0; - buf.buflen = buf.head->iov_len; - buf.len = buf.head->iov_len; - - xdr_init_decode(&stream, &buf, ptr); + scratch = alloc_page(GFP_KERNEL); + if (scratch == NULL) + return -ENOMEM; + xdr_init_decode(&stream, &buf, NULL); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); do { status = xdr_decode(desc, entry, &stream); @@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en } else status = PTR_ERR(array); } + + put_page(scratch); return status; } @@ -521,7 +523,6 @@ static void nfs_readdir_free_large_page(void *ptr, struct page **pages, unsigned int npages) { - vm_unmap_ram(ptr, npages); nfs_readdir_free_pagearray(pages, npages); } @@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages, * to nfs_readdir_free_large_page */ static -void *nfs_readdir_large_page(struct page **pages, unsigned int npages) +int nfs_readdir_large_page(struct page **pages, unsigned int npages) { - void *ptr; unsigned int i; for (i = 0; i < npages; i++) { @@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages) goto out_freepages; pages[i] = page; } + return 0; - ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL); - if (!IS_ERR_OR_NULL(ptr)) - return ptr; out_freepages: nfs_readdir_free_pagearray(pages, i); - return NULL; + return -ENOMEM; } static @@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, memset(array, 0, sizeof(struct nfs_cache_array)); array->eof_index = -1; - pages_ptr = nfs_readdir_large_page(pages, array_size); - if (!pages_ptr) + status = nfs_readdir_large_page(pages, array_size); + if (status < 0) goto out_release_array; do { unsigned int pglen; @@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, if (status < 0) break; pglen = status; - status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); + status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); if (status < 0) { if (status == -ENOSPC) status = 0; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5914a1911c95..b382a1b5e7e4 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se entry->d_type = DT_UNKNOWN; - p = xdr_inline_peek(xdr, 8); - if (p != NULL) - entry->eof = !p[0] && p[1]; - else - entry->eof = 0; - return p; out_overflow: diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index f6cc60f06dac..ba91236c6ee7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); } - p = xdr_inline_peek(xdr, 8); - if (p != NULL) - entry->eof = !p[0] && p[1]; - else - entry->eof = 0; - return p; out_overflow: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9f1826b012e6..0662a9821df5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (verify_attr_len(xdr, p, len) < 0) goto out_overflow; - p = xdr_inline_peek(xdr, 8); - if (p != NULL) - entry->eof = !p[0] && p[1]; - else - entry->eof = 0; - return p; out_overflow: diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 498ab93a81e4..7783c687c777 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -201,6 +201,8 @@ struct xdr_stream { __be32 *end; /* end of available buffer space */ struct kvec *iov; /* pointer to the current kvec */ + struct kvec scratch; /* Scratch buffer */ + struct page **page_ptr; /* pointer to the current page */ }; extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); @@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); -extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes); +extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index cd9e841e7492..679cd674b81d 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b } EXPORT_SYMBOL_GPL(xdr_write_pages); +static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, + __be32 *p, unsigned int len) +{ + if (len > iov->iov_len) + len = iov->iov_len; + if (p == NULL) + p = (__be32*)iov->iov_base; + xdr->p = p; + xdr->end = (__be32*)(iov->iov_base + len); + xdr->iov = iov; + xdr->page_ptr = NULL; +} + +static int xdr_set_page_base(struct xdr_stream *xdr, + unsigned int base, unsigned int len) +{ + unsigned int pgnr; + unsigned int maxlen; + unsigned int pgoff; + unsigned int pgend; + void *kaddr; + + maxlen = xdr->buf->page_len; + if (base >= maxlen) + return -EINVAL; + maxlen -= base; + if (len > maxlen) + len = maxlen; + + base += xdr->buf->page_base; + + pgnr = base >> PAGE_SHIFT; + xdr->page_ptr = &xdr->buf->pages[pgnr]; + kaddr = page_address(*xdr->page_ptr); + + pgoff = base & ~PAGE_MASK; + xdr->p = (__be32*)(kaddr + pgoff); + + pgend = pgoff + len; + if (pgend > PAGE_SIZE) + pgend = PAGE_SIZE; + xdr->end = (__be32*)(kaddr + pgend); + xdr->iov = NULL; + return 0; +} + +static void xdr_set_next_page(struct xdr_stream *xdr) +{ + unsigned int newbase; + + newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT; + newbase -= xdr->buf->page_base; + + if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); +} + +static bool xdr_set_next_buffer(struct xdr_stream *xdr) +{ + if (xdr->page_ptr != NULL) + xdr_set_next_page(xdr); + else if (xdr->iov == xdr->buf->head) { + if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) + xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); + } + return xdr->p != xdr->end; +} + /** * xdr_init_decode - Initialize an xdr_stream for decoding data. * @xdr: pointer to xdr_stream struct @@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages); */ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) { - struct kvec *iov = buf->head; - unsigned int len = iov->iov_len; - - if (len > buf->len) - len = buf->len; xdr->buf = buf; - xdr->iov = iov; - xdr->p = p; - xdr->end = (__be32 *)((char *)iov->iov_base + len); + xdr->scratch.iov_base = NULL; + xdr->scratch.iov_len = 0; + if (buf->head[0].iov_len != 0) + xdr_set_iov(xdr, buf->head, p, buf->len); + else if (buf->page_len != 0) + xdr_set_page_base(xdr, 0, buf->len); } EXPORT_SYMBOL_GPL(xdr_init_decode); -/** - * xdr_inline_peek - Allow read-ahead in the XDR data stream - * @xdr: pointer to xdr_stream struct - * @nbytes: number of bytes of data to decode - * - * Check if the input buffer is long enough to enable us to decode - * 'nbytes' more bytes of data starting at the current position. - * If so return the current pointer without updating the current - * pointer position. - */ -__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes) +static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p = xdr->p; __be32 *q = p + XDR_QUADLEN(nbytes); if (unlikely(q > xdr->end || q < p)) return NULL; + xdr->p = q; return p; } -EXPORT_SYMBOL_GPL(xdr_inline_peek); /** - * xdr_inline_decode - Retrieve non-page XDR data to decode + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} +EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); + +static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) +{ + __be32 *p; + void *cpdest = xdr->scratch.iov_base; + size_t cplen = (char *)xdr->end - (char *)xdr->p; + + if (nbytes > xdr->scratch.iov_len) + return NULL; + memcpy(cpdest, xdr->p, cplen); + cpdest += cplen; + nbytes -= cplen; + if (!xdr_set_next_buffer(xdr)) + return NULL; + p = __xdr_inline_decode(xdr, nbytes); + if (p == NULL) + return NULL; + memcpy(cpdest, p, nbytes); + return xdr->scratch.iov_base; +} + +/** + * xdr_inline_decode - Retrieve XDR data to decode * @xdr: pointer to xdr_stream struct * @nbytes: number of bytes of data to decode * @@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek); */ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { - __be32 *p = xdr->p; - __be32 *q = p + XDR_QUADLEN(nbytes); + __be32 *p; - if (unlikely(q > xdr->end || q < p)) + if (nbytes == 0) + return xdr->p; + if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) return NULL; - xdr->p = q; - return p; + p = __xdr_inline_decode(xdr, nbytes); + if (p != NULL) + return p; + return xdr_copy_to_scratch(xdr, nbytes); } EXPORT_SYMBOL_GPL(xdr_inline_decode); @@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages); */ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) { - char * kaddr = page_address(xdr->buf->pages[0]); xdr_read_pages(xdr, len); /* * Position current pointer at beginning of tail, and * set remaining message length. */ - if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) - len = PAGE_CACHE_SIZE - xdr->buf->page_base; - xdr->p = (__be32 *)(kaddr + xdr->buf->page_base); - xdr->end = (__be32 *)((char *)xdr->p + len); + xdr_set_page_base(xdr, 0, len); } EXPORT_SYMBOL_GPL(xdr_enter_page); -- cgit v1.2.3-70-g09d2