From 879aa24d6394aa04b690a600a41ff500441ad384 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 26 Nov 2010 06:47:28 -0300 Subject: [media] V4L: improve the BKL replacement heuristic The BKL replacement mutex had some serious performance side-effects on V4L drivers. It is replaced by a better heuristic that works around the worst of the side-effects. Read the v4l2-dev.c comments for the whole sorry story. This is a temporary measure only until we can convert all v4l drivers to use unlocked_ioctl. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index 6648036b728d..b16f307d471a 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -51,6 +51,8 @@ struct v4l2_device { unsigned int notification, void *arg); /* The control handler. May be NULL. */ struct v4l2_ctrl_handler *ctrl_handler; + /* BKL replacement mutex. Temporary solution only. */ + struct mutex ioctl_lock; }; /* Initialize v4l2_dev and make dev->driver_data point to v4l2_dev. -- cgit v1.2.3-70-g09d2 From a757ee2216211278680dd8ac869aabe7b4a9970d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 2 Dec 2010 01:57:03 -0200 Subject: [media] Don't export format_by_forcc on two different drivers Drivers should append their name on exported symbols, to avoid conflicts with allyesconfig: drivers/staging/built-in.o: In function `format_by_fourcc': /home/v4l/work_trees/linus/drivers/staging/cx25821/cx25821-video.c:96: multiple definition of `format_by_fourcc' drivers/media/built-in.o:/home/v4l/work_trees/linus/drivers/media/common/saa7146_video.c:88: first defined here Let's rename both occurences with a small shellscript: for i in drivers/staging/cx25821/*.[ch]; do sed s,format_by_fourcc,cx25821_format_by_fourcc,g <$i >a && mv a $i; done for i in drivers/media/common/saa7146*.[ch]; do sed s,format_by_fourcc,saa7146_format_by_fourcc,g <$i >a && mv a $i; done for i in include/media/saa7146*.[ch]; do sed s,format_by_fourcc,saa7146_format_by_fourcc,g <$i >a && mv a $i; done Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/saa7146_hlp.c | 8 ++++---- drivers/media/common/saa7146_video.c | 16 ++++++++-------- drivers/staging/cx25821/cx25821-video.c | 8 ++++---- drivers/staging/cx25821/cx25821-video.h | 2 +- include/media/saa7146.h | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/media/common/saa7146_hlp.c b/drivers/media/common/saa7146_hlp.c index 05bde9ccb770..1d1d8d200755 100644 --- a/drivers/media/common/saa7146_hlp.c +++ b/drivers/media/common/saa7146_hlp.c @@ -558,7 +558,7 @@ static void saa7146_set_window(struct saa7146_dev *dev, int width, int height, e static void saa7146_set_position(struct saa7146_dev *dev, int w_x, int w_y, int w_height, enum v4l2_field field, u32 pixelformat) { struct saa7146_vv *vv = dev->vv_data; - struct saa7146_format *sfmt = format_by_fourcc(dev, pixelformat); + struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev, pixelformat); int b_depth = vv->ov_fmt->depth; int b_bpl = vv->ov_fb.fmt.bytesperline; @@ -702,7 +702,7 @@ static int calculate_video_dma_grab_packed(struct saa7146_dev* dev, struct saa71 struct saa7146_vv *vv = dev->vv_data; struct saa7146_video_dma vdma1; - struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat); + struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat); int width = buf->fmt->width; int height = buf->fmt->height; @@ -827,7 +827,7 @@ static int calculate_video_dma_grab_planar(struct saa7146_dev* dev, struct saa71 struct saa7146_video_dma vdma2; struct saa7146_video_dma vdma3; - struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat); + struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat); int width = buf->fmt->width; int height = buf->fmt->height; @@ -994,7 +994,7 @@ static void program_capture_engine(struct saa7146_dev *dev, int planar) void saa7146_set_capture(struct saa7146_dev *dev, struct saa7146_buf *buf, struct saa7146_buf *next) { - struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat); + struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat); struct saa7146_vv *vv = dev->vv_data; u32 vdma1_prot_addr; diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c index 741c5732b430..d246910129e8 100644 --- a/drivers/media/common/saa7146_video.c +++ b/drivers/media/common/saa7146_video.c @@ -84,7 +84,7 @@ static struct saa7146_format formats[] = { static int NUM_FORMATS = sizeof(formats)/sizeof(struct saa7146_format); -struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc) +struct saa7146_format* saa7146_format_by_fourcc(struct saa7146_dev *dev, int fourcc) { int i, j = NUM_FORMATS; @@ -266,7 +266,7 @@ static int saa7146_pgtable_build(struct saa7146_dev *dev, struct saa7146_buf *bu struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb); struct scatterlist *list = dma->sglist; int length = dma->sglen; - struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat); + struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat); DEB_EE(("dev:%p, buf:%p, sg_len:%d\n",dev,buf,length)); @@ -408,7 +408,7 @@ static int video_begin(struct saa7146_fh *fh) } } - fmt = format_by_fourcc(dev,fh->video_fmt.pixelformat); + fmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat); /* we need to have a valid format set here */ BUG_ON(NULL == fmt); @@ -460,7 +460,7 @@ static int video_end(struct saa7146_fh *fh, struct file *file) return -EBUSY; } - fmt = format_by_fourcc(dev,fh->video_fmt.pixelformat); + fmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat); /* we need to have a valid format set here */ BUG_ON(NULL == fmt); @@ -536,7 +536,7 @@ static int vidioc_s_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *f return -EPERM; /* check args */ - fmt = format_by_fourcc(dev, fb->fmt.pixelformat); + fmt = saa7146_format_by_fourcc(dev, fb->fmt.pixelformat); if (NULL == fmt) return -EINVAL; @@ -760,7 +760,7 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *fh, struct v4l2_forma DEB_EE(("V4L2_BUF_TYPE_VIDEO_CAPTURE: dev:%p, fh:%p\n", dev, fh)); - fmt = format_by_fourcc(dev, f->fmt.pix.pixelformat); + fmt = saa7146_format_by_fourcc(dev, f->fmt.pix.pixelformat); if (NULL == fmt) return -EINVAL; @@ -1264,7 +1264,7 @@ static int buffer_prepare(struct videobuf_queue *q, buf->fmt = &fh->video_fmt; buf->vb.field = fh->video_fmt.field; - sfmt = format_by_fourcc(dev,buf->fmt->pixelformat); + sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat); release_all_pagetables(dev, buf); if( 0 != IS_PLANAR(sfmt->trans)) { @@ -1378,7 +1378,7 @@ static int video_open(struct saa7146_dev *dev, struct file *file) fh->video_fmt.pixelformat = V4L2_PIX_FMT_BGR24; fh->video_fmt.bytesperline = 0; fh->video_fmt.field = V4L2_FIELD_ANY; - sfmt = format_by_fourcc(dev,fh->video_fmt.pixelformat); + sfmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat); fh->video_fmt.sizeimage = (fh->video_fmt.width * fh->video_fmt.height * sfmt->depth)/8; videobuf_queue_sg_init(&fh->video_q, &video_qops, diff --git a/drivers/staging/cx25821/cx25821-video.c b/drivers/staging/cx25821/cx25821-video.c index e7f1d5778cec..52389308f333 100644 --- a/drivers/staging/cx25821/cx25821-video.c +++ b/drivers/staging/cx25821/cx25821-video.c @@ -92,7 +92,7 @@ int cx25821_get_format_size(void) return ARRAY_SIZE(formats); } -struct cx25821_fmt *format_by_fourcc(unsigned int fourcc) +struct cx25821_fmt *cx25821_format_by_fourcc(unsigned int fourcc) { unsigned int i; @@ -848,7 +848,7 @@ static int video_open(struct file *file) pix_format = (dev->channels[ch_id].pixel_formats == PIXEL_FRMT_411) ? V4L2_PIX_FMT_Y41P : V4L2_PIX_FMT_YUYV; - fh->fmt = format_by_fourcc(pix_format); + fh->fmt = cx25821_format_by_fourcc(pix_format); v4l2_prio_open(&dev->channels[ch_id].prio, &fh->prio); @@ -1010,7 +1010,7 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *priv, if (0 != err) return err; - fh->fmt = format_by_fourcc(f->fmt.pix.pixelformat); + fh->fmt = cx25821_format_by_fourcc(f->fmt.pix.pixelformat); fh->vidq.field = f->fmt.pix.field; /* check if width and height is valid based on set standard */ @@ -1119,7 +1119,7 @@ int cx25821_vidioc_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fo enum v4l2_field field; unsigned int maxw, maxh; - fmt = format_by_fourcc(f->fmt.pix.pixelformat); + fmt = cx25821_format_by_fourcc(f->fmt.pix.pixelformat); if (NULL == fmt) return -EINVAL; diff --git a/drivers/staging/cx25821/cx25821-video.h b/drivers/staging/cx25821/cx25821-video.h index cc6034b1a95d..a2415d33235b 100644 --- a/drivers/staging/cx25821/cx25821-video.h +++ b/drivers/staging/cx25821/cx25821-video.h @@ -87,7 +87,7 @@ extern unsigned int vid_limit; #define FORMAT_FLAGS_PACKED 0x01 extern struct cx25821_fmt formats[]; -extern struct cx25821_fmt *format_by_fourcc(unsigned int fourcc); +extern struct cx25821_fmt *cx25821_format_by_fourcc(unsigned int fourcc); extern struct cx25821_data timeout_data[MAX_VID_CHANNEL_NUM]; extern void cx25821_dump_video_queue(struct cx25821_dev *dev, diff --git a/include/media/saa7146.h b/include/media/saa7146.h index 7a9f76ecbbbd..ac7ce00f39cf 100644 --- a/include/media/saa7146.h +++ b/include/media/saa7146.h @@ -161,7 +161,7 @@ extern struct list_head saa7146_devices; extern struct mutex saa7146_devices_lock; int saa7146_register_extension(struct saa7146_extension*); int saa7146_unregister_extension(struct saa7146_extension*); -struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc); +struct saa7146_format* saa7146_format_by_fourcc(struct saa7146_dev *dev, int fourcc); int saa7146_pgtable_alloc(struct pci_dev *pci, struct saa7146_pgtable *pt); void saa7146_pgtable_free(struct pci_dev *pci, struct saa7146_pgtable *pt); int saa7146_pgtable_build_single(struct pci_dev *pci, struct saa7146_pgtable *pt, struct scatterlist *list, int length ); -- cgit v1.2.3-70-g09d2 From 6072d13c429373c5d63b69dadbbef40a9b035552 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Dec 2010 13:35:19 -0500 Subject: Call the filesystem back whenever a page is removed from the page cache NFS needs to be able to release objects that are stored in the page cache once the page itself is no longer visible from the page cache. This patch adds a callback to the address space operations that allows filesystems to perform page cleanups once the page has been removed from the page cache. Original patch by: Linus Torvalds [trondmy: cover the cases of invalidate_inode_pages2() and truncate_inode_pages()] Signed-off-by: Trond Myklebust --- Documentation/filesystems/Locking | 7 ++++++- Documentation/filesystems/vfs.txt | 7 +++++++ include/linux/fs.h | 1 + mm/filemap.c | 5 +++++ mm/truncate.c | 4 ++++ mm/vmscan.c | 7 +++++++ 6 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a91f30890011..b6426f15b4ae 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -173,12 +173,13 @@ prototypes: sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); + void (*freepage)(struct page *); int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); int (*launder_page) (struct page *); locking rules: - All except set_page_dirty may block + All except set_page_dirty and freepage may block BKL PageLocked(page) i_mutex writepage: no yes, unlocks (see below) @@ -193,6 +194,7 @@ perform_write: no n/a yes bmap: no invalidatepage: no yes releasepage: no yes +freepage: no yes direct_IO: no launder_page: no yes @@ -288,6 +290,9 @@ buffers from the page in preparation for freeing it. It returns zero to indicate that the buffers are (or may be) freeable. If ->releasepage is zero, the kernel assumes that the fs has no private interest in the buffers. + ->freepage() is called when the kernel is done dropping the page +from the page cache. + ->launder_page() may be called prior to releasing a page if it is still found to be dirty. It returns zero if the page was successfully cleaned, or an error value if not. Note that in order to prevent the page diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ed7e5efc06d8..3b14a557eca6 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -534,6 +534,7 @@ struct address_space_operations { sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); + void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); struct page* (*get_xip_page)(struct address_space *, sector_t, @@ -679,6 +680,12 @@ struct address_space_operations { need to ensure this. Possibly it can clear the PageUptodate bit if it cannot free private data yet. + freepage: freepage is called once the page is no longer visible in + the page cache in order to allow the cleanup of any private + data. Since it may be called by the memory reclaimer, it + should not assume that the original address_space mapping still + exists, and it should not block. + direct_IO: called by the generic read/write routines to perform direct_IO - that is IO requests which bypass the page cache and transfer data directly between the storage and the diff --git a/include/linux/fs.h b/include/linux/fs.h index c9e06cc70dad..090f0eacde29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -602,6 +602,7 @@ struct address_space_operations { sector_t (*bmap)(struct address_space *, sector_t); void (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, gfp_t); + void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); int (*get_xip_mem)(struct address_space *, pgoff_t, int, diff --git a/mm/filemap.c b/mm/filemap.c index ea89840fc65f..6b9aee20f242 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -143,13 +143,18 @@ void __remove_from_page_cache(struct page *page) void remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); + freepage = mapping->a_ops->freepage; spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (freepage) + freepage(page); } EXPORT_SYMBOL(remove_from_page_cache); diff --git a/mm/truncate.c b/mm/truncate.c index ba887bff48c5..3c2d5ddfa0d4 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -390,6 +390,10 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (mapping->a_ops->freepage) + mapping->a_ops->freepage(page); + page_cache_release(page); /* pagecache ref */ return 1; failed: diff --git a/mm/vmscan.c b/mm/vmscan.c index d31d7ce52c0e..9ca587c69274 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -494,9 +494,16 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) spin_unlock_irq(&mapping->tree_lock); swapcache_free(swap, page); } else { + void (*freepage)(struct page *); + + freepage = mapping->a_ops->freepage; + __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); + + if (freepage != NULL) + freepage(page); } return 1; -- cgit v1.2.3-70-g09d2 From 11de3b11e08cac26d59e88efaf4e316701883552 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Dec 2010 14:17:06 -0500 Subject: NFS: Fix a memory leak in nfs_readdir We need to ensure that the entries in the nfs_cache_array get cleared when the page is removed from the page cache. To do so, we use the freepage address_space operation. Change nfs_readdir_clear_array to use kmap_atomic(), so that the function can be safely called from all contexts. Finally, modify the cache_page_release helper to call nfs_readdir_clear_array directly, when dealing with an anonymous page from 'uncached_readdir'. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 18 +++++++++--------- fs/nfs/inode.c | 1 + include/linux/nfs_fs.h | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e03537ff9350..d529e0e99efa 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -57,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); -static int nfs_readdir_clear_array(struct page*, gfp_t); +static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -83,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; -const struct address_space_operations nfs_dir_addr_space_ops = { - .releasepage = nfs_readdir_clear_array, +const struct address_space_operations nfs_dir_aops = { + .freepage = nfs_readdir_clear_array, }; #ifdef CONFIG_NFS_V3 @@ -214,17 +214,15 @@ void nfs_readdir_release_array(struct page *page) * we are freeing strings created by nfs_add_to_readdir_array() */ static -int nfs_readdir_clear_array(struct page *page, gfp_t mask) +void nfs_readdir_clear_array(struct page *page) { - struct nfs_cache_array *array = nfs_readdir_get_array(page); + struct nfs_cache_array *array; int i; - if (IS_ERR(array)) - return PTR_ERR(array); + array = kmap_atomic(page, KM_USER0); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); - nfs_readdir_release_array(page); - return 0; + kunmap_atomic(array, KM_USER0); } /* @@ -639,6 +637,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { + if (!desc->page->mapping) + nfs_readdir_clear_array(desc->page); page_cache_release(desc->page); desc->page = NULL; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 314f57164602..e67e31c73416 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; + inode->i_data.a_ops = &nfs_dir_aops; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c66fdb7d6998..29d504d5d1c3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -401,6 +401,7 @@ extern const struct inode_operations nfs3_file_inode_operations; #endif /* CONFIG_NFS_V3 */ extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; +extern const struct address_space_operations nfs_dir_aops; static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) { -- cgit v1.2.3-70-g09d2 From 3fcc0afbb9c93f3599ba03273e59915670b6c2c2 Mon Sep 17 00:00:00 2001 From: Uk Kim Date: Sun, 5 Dec 2010 17:32:16 +0900 Subject: ASoC: Fix off by one error in WM8994 EQ register bank size Signed-off-by: Uk Kim Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/mfd/wm8994/pdata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index 5c51f367c061..add8a1b8bcf0 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -29,7 +29,7 @@ struct wm8994_ldo_pdata { #define WM8994_CONFIGURE_GPIO 0x8000 #define WM8994_DRC_REGS 5 -#define WM8994_EQ_REGS 19 +#define WM8994_EQ_REGS 20 /** * DRC configurations are specified with a label and a set of register -- cgit v1.2.3-70-g09d2 From 46bcf14f44d8f31ecfdc8b6708ec15a3b33316d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2010 09:29:43 -0800 Subject: filter: fix sk_filter rcu handling Pavel Emelyanov tried to fix a race between sk_filter_(de|at)tach and sk_clone() in commit 47e958eac280c263397 Problem is we can have several clones sharing a common sk_filter, and these clones might want to sk_filter_attach() their own filters at the same time, and can overwrite old_filter->rcu, corrupting RCU queues. We can not use filter->rcu without being sure no other thread could do the same thing. Switch code to a more conventional ref-counting technique : Do the atomic decrement immediately and queue one rcu call back when last reference is released. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- net/core/filter.c | 19 ++++++------------- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index a6338d039857..659d968d95c5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1155,6 +1155,8 @@ extern void sk_common_release(struct sock *sk); /* Initialise core socket variables */ extern void sock_init_data(struct socket *sock, struct sock *sk); +extern void sk_filter_release_rcu(struct rcu_head *rcu); + /** * sk_filter_release - release a socket filter * @fp: filter to remove @@ -1165,7 +1167,7 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); static inline void sk_filter_release(struct sk_filter *fp) { if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_release_rcu); } static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index c1ee800bc080..ae21a0d3c4a2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -589,23 +589,16 @@ int sk_chk_filter(struct sock_filter *filter, int flen) EXPORT_SYMBOL(sk_chk_filter); /** - * sk_filter_rcu_release - Release a socket filter by rcu_head + * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -static void sk_filter_rcu_release(struct rcu_head *rcu) +void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - sk_filter_release(fp); -} - -static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) -{ - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); - call_rcu_bh(&fp->rcu, sk_filter_rcu_release); + kfree(fp); } +EXPORT_SYMBOL(sk_filter_release_rcu); /** * sk_attach_filter - attach a socket filter @@ -649,7 +642,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); if (old_fp) - sk_filter_delayed_uncharge(sk, old_fp); + sk_filter_uncharge(sk, old_fp); return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -663,7 +656,7 @@ int sk_detach_filter(struct sock *sk) sock_owned_by_user(sk)); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_delayed_uncharge(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } return ret; -- cgit v1.2.3-70-g09d2 From 88d60c32765716289abeb362c44adf6c35c6824c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 8 Nov 2010 18:19:22 -0500 Subject: fanotify: remove packed from access response message Since fanotify has decided to be careful about alignment and packing rather than rely on __attribute__((packed)) for multiarch support. Since this attribute isn't doing anything on fanotify_response we just drop it. This does not break API/ABI. Suggested-by: Tvrtko Ursulin Signed-off-by: Eric Paris --- include/linux/fanotify.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 0f0121467fc4..bdbf9bb29b54 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -96,7 +96,7 @@ struct fanotify_event_metadata { struct fanotify_response { __s32 fd; __u32 response; -} __attribute__ ((packed)); +}; /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 -- cgit v1.2.3-70-g09d2 From b1085ba80cd2784400a7beec3fda5099198ed01c Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 5 Nov 2010 17:05:27 +0100 Subject: fanotify: if set by user unset FMODE_NONOTIFY before fsnotify_perm() is called Unsetting FMODE_NONOTIFY in fsnotify_open() is too late, since fsnotify_perm() is called before. If FMODE_NONOTIFY is set fsnotify_perm() will skip permission checks, so a user can still disable permission checks by setting this flag in an open() call. This patch corrects this by unsetting the flag before fsnotify_perm is called. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/namei.c | 3 +++ include/linux/fsnotify.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/namei.c b/fs/namei.c index 5362af9b7372..4ff7ca530533 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname, if (!(open_flag & O_CREAT)) mode = 0; + /* Must never be set by userspace */ + open_flag &= ~FMODE_NONOTIFY; + /* * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only * check for O_DSYNC if the need any syncing at all we enforce it's diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 5c185fa27089..b10bcdeaef76 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -235,9 +235,6 @@ static inline void fsnotify_open(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - /* FMODE_NONOTIFY must never be set from user */ - file->f_mode &= ~FMODE_NONOTIFY; - fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } -- cgit v1.2.3-70-g09d2 From 09e5f14e57c70f9d357862bb56e57026c51092a1 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 19 Nov 2010 10:58:07 +0100 Subject: fanotify: on group destroy allow all waiters to bypass permission check When fanotify_release() is called, there may still be processes waiting for access permission. Currently only processes for which an event has already been queued into the groups access list will be woken up. Processes for which no event has been queued will continue to sleep and thus cause a deadlock when fsnotify_put_group() is called. Furthermore there is a race allowing further processes to be waiting on the access wait queue after wake_up (if they arrive before clear_marks_by_group() is called). This patch corrects this by setting a flag to inform processes that the group is about to be destroyed and thus not to wait for access permission. [additional changelog from eparis] Lets think about the 4 relevant code paths from the PoV of the 'operator' 'listener' 'responder' and 'closer'. Where operator is the process doing an action (like open/read) which could require permission. Listener is the task (or in this case thread) slated with reading from the fanotify file descriptor. The 'responder' is the thread responsible for responding to access requests. 'Closer' is the thread attempting to close the fanotify file descriptor. The 'operator' is going to end up in: fanotify_handle_event() get_response_from_access() (THIS BLOCKS WAITING ON USERSPACE) The 'listener' interesting code path fanotify_read() copy_event_to_user() prepare_for_access_response() (THIS CREATES AN fanotify_response_event) The 'responder' code path: fanotify_write() process_access_response() (REMOVE A fanotify_response_event, SET RESPONSE, WAKE UP 'operator') The 'closer': fanotify_release() (SUPPOSED TO CLEAN UP THE REST OF THIS MESS) What we have today is that in the closer we remove all of the fanotify_response_events and set a bit so no more response events are ever created in prepare_for_access_response(). The bug is that we never wake all of the operators up and tell them to move along. You fix that in fanotify_get_response_from_access(). You also fix other operators which haven't gotten there yet. So I agree that's a good fix. [/additional changelog from eparis] [remove additional changes to minimize patch size] [move initialization so it was inside CONFIG_FANOTIFY_PERMISSION] Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 6 +++++- fs/notify/fanotify/fanotify_user.c | 5 +++-- include/linux/fsnotify_backend.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index b04f88eed09e..f35794b97e8e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response); + wait_event(group->fanotify_data.access_waitq, event->response || + atomic_read(&group->fanotify_data.bypass_perm)); + + if (!event->response) /* bypass_perm set */ + return 0; /* userspace responded, convert to something usable */ spin_lock(&event->lock); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 480434c5ee5f..01fffe62a2d4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -200,7 +200,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, mutex_lock(&group->fanotify_data.access_mutex); - if (group->fanotify_data.bypass_perm) { + if (atomic_read(&group->fanotify_data.bypass_perm)) { mutex_unlock(&group->fanotify_data.access_mutex); kmem_cache_free(fanotify_response_event_cache, re); event->response = FAN_ALLOW; @@ -390,7 +390,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) mutex_lock(&group->fanotify_data.access_mutex); - group->fanotify_data.bypass_perm = true; + atomic_inc(&group->fanotify_data.bypass_perm); list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, @@ -703,6 +703,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) mutex_init(&group->fanotify_data.access_mutex); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); + atomic_set(&group->fanotify_data.bypass_perm, 0); #endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0a68f924f06f..7380763595d3 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -166,7 +166,7 @@ struct fsnotify_group { struct mutex access_mutex; struct list_head access_list; wait_queue_head_t access_waitq; - bool bypass_perm; /* protected by access_mutex */ + atomic_t bypass_perm; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; -- cgit v1.2.3-70-g09d2 From e9a3854fd4ff3907e6c200a3980e19365ee695e9 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 24 Nov 2010 18:22:09 +0100 Subject: fanotify: Introduce FAN_NOFD FAN_NOFD is used in fanotify events that do not provide an open file descriptor (like the overflow_event). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fanotify.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index bdbf9bb29b54..c73224315aee 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -101,6 +101,8 @@ struct fanotify_response { /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 +/* No fd set in event */ +#define FAN_NOFD -1 /* Helper functions to deal with fanotify_event_metadata buffers */ #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) -- cgit v1.2.3-70-g09d2 From 2df485a774ba59c3f43bfe84107672c1d9b731a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Dec 2010 22:39:17 -0500 Subject: nfs: remove extraneous and problematic calls to nfs_clear_request When a nfs_page is freed, nfs_free_request is called which also calls nfs_clear_request to clean out the lock and open contexts and free the pagecache page. However, a couple of places in the nfs code call nfs_clear_request themselves. What happens here if the refcount on the request is still high? We'll be releasing contexts and freeing pointers while the request is possibly still in use. Remove those bare calls to nfs_clear_context. That should only be done when the request is being freed. Note that when doing this, we need to watch out for tests of req->wb_page. Previously, nfs_set_page_tag_locked() and nfs_clear_page_tag_locked() would check the value of req->wb_page to figure out if the page is mapped into the nfsi->nfs_page_tree. We now indicate the page is mapped using the new bit PG_MAPPED in req->wb_flags . Reported-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ++-- fs/nfs/read.c | 1 - fs/nfs/write.c | 3 ++- include/linux/nfs_page.h | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 137b549e63db..b68536cc9046 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -115,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) { if (!nfs_lock_request_dontget(req)) return 0; - if (req->wb_page != NULL) + if (test_bit(PG_MAPPED, &req->wb_flags)) radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); return 1; } @@ -125,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) */ void nfs_clear_page_tag_locked(struct nfs_page *req) { - if (req->wb_page != NULL) { + if (test_bit(PG_MAPPED, &req->wb_flags)) { struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e4b62c6f5a6e..aedcaa7f291f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req) (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - nfs_clear_request(req); nfs_release_request(req); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4c14c17a5276..10d648ea128b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } + set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; @@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); + clear_bit(PG_MAPPED, &req->wb_flags); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { @@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) iput(inode); } else spin_unlock(&inode->i_lock); - nfs_clear_request(req); nfs_release_request(req); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index f8b60e7f4c44..d55cee73f634 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -29,6 +29,7 @@ */ enum { PG_BUSY = 0, + PG_MAPPED, PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, -- cgit v1.2.3-70-g09d2 From 0f004f5a696a9434b7214d0d3cbd0525ee77d428 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Nov 2010 19:48:45 +0100 Subject: sched: Cure more NO_HZ load average woes There's a long-running regression that proved difficult to fix and which is hitting certain people and is rather annoying in its effects. Damien reported that after 74f5187ac8 (sched: Cure load average vs NO_HZ woes) his load average is unnaturally high, he also noted that even with that patch reverted the load avgerage numbers are not correct. The problem is that the previous patch only solved half the NO_HZ problem, it addressed the part of going into NO_HZ mode, not of comming out of NO_HZ mode. This patch implements that missing half. When comming out of NO_HZ mode there are two important things to take care of: - Folding the pending idle delta into the global active count. - Correctly aging the averages for the idle-duration. So with this patch the NO_HZ interaction should be complete and behaviour between CONFIG_NO_HZ=[yn] should be equivalent. Furthermore, this patch slightly changes the load average computation by adding a rounding term to the fixed point multiplication. Reported-by: Damien Wyart Reported-by: Tim McGrath Tested-by: Damien Wyart Tested-by: Orion Poplawski Tested-by: Kyle McMartin Signed-off-by: Peter Zijlstra Cc: stable@kernel.org Cc: Chase Douglas LKML-Reference: <1291129145.32004.874.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++---- kernel/timer.c | 2 +- 3 files changed, 141 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c79e921a68b..223874538b33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); -extern void calc_global_load(void); +extern void calc_global_load(unsigned long ticks); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched.c b/kernel/sched.c index dc91a4d09ac3..6b7c26a1a097 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3119,6 +3119,15 @@ static long calc_load_fold_active(struct rq *this_rq) return delta; } +static unsigned long +calc_load(unsigned long load, unsigned long exp, unsigned long active) +{ + load *= exp; + load += active * (FIXED_1 - exp); + load += 1UL << (FSHIFT - 1); + return load >> FSHIFT; +} + #ifdef CONFIG_NO_HZ /* * For NO_HZ we delay the active fold to the next LOAD_FREQ update. @@ -3148,6 +3157,128 @@ static long calc_load_fold_idle(void) return delta; } + +/** + * fixed_power_int - compute: x^n, in O(log n) time + * + * @x: base of the power + * @frac_bits: fractional bits of @x + * @n: power to raise @x to. + * + * By exploiting the relation between the definition of the natural power + * function: x^n := x*x*...*x (x multiplied by itself for n times), and + * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, + * (where: n_i \elem {0, 1}, the binary vector representing n), + * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is + * of course trivially computable in O(log_2 n), the length of our binary + * vector. + */ +static unsigned long +fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) +{ + unsigned long result = 1UL << frac_bits; + + if (n) for (;;) { + if (n & 1) { + result *= x; + result += 1UL << (frac_bits - 1); + result >>= frac_bits; + } + n >>= 1; + if (!n) + break; + x *= x; + x += 1UL << (frac_bits - 1); + x >>= frac_bits; + } + + return result; +} + +/* + * a1 = a0 * e + a * (1 - e) + * + * a2 = a1 * e + a * (1 - e) + * = (a0 * e + a * (1 - e)) * e + a * (1 - e) + * = a0 * e^2 + a * (1 - e) * (1 + e) + * + * a3 = a2 * e + a * (1 - e) + * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) + * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) + * + * ... + * + * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] + * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) + * = a0 * e^n + a * (1 - e^n) + * + * [1] application of the geometric series: + * + * n 1 - x^(n+1) + * S_n := \Sum x^i = ------------- + * i=0 1 - x + */ +static unsigned long +calc_load_n(unsigned long load, unsigned long exp, + unsigned long active, unsigned int n) +{ + + return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); +} + +/* + * NO_HZ can leave us missing all per-cpu ticks calling + * calc_load_account_active(), but since an idle CPU folds its delta into + * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold + * in the pending idle delta if our idle period crossed a load cycle boundary. + * + * Once we've updated the global active value, we need to apply the exponential + * weights adjusted to the number of cycles missed. + */ +static void calc_global_nohz(unsigned long ticks) +{ + long delta, active, n; + + if (time_before(jiffies, calc_load_update)) + return; + + /* + * If we crossed a calc_load_update boundary, make sure to fold + * any pending idle changes, the respective CPUs might have + * missed the tick driven calc_load_account_active() update + * due to NO_HZ. + */ + delta = calc_load_fold_idle(); + if (delta) + atomic_long_add(delta, &calc_load_tasks); + + /* + * If we were idle for multiple load cycles, apply them. + */ + if (ticks >= LOAD_FREQ) { + n = ticks / LOAD_FREQ; + + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; + + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + + calc_load_update += n * LOAD_FREQ; + } + + /* + * Its possible the remainder of the above division also crosses + * a LOAD_FREQ period, the regular check in calc_global_load() + * which comes after this will take care of that. + * + * Consider us being 11 ticks before a cycle completion, and us + * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will + * age us 4 cycles, and the test in calc_global_load() will + * pick up the final one. + */ +} #else static void calc_load_account_idle(struct rq *this_rq) { @@ -3157,6 +3288,10 @@ static inline long calc_load_fold_idle(void) { return 0; } + +static void calc_global_nohz(unsigned long ticks) +{ +} #endif /** @@ -3174,24 +3309,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) loads[2] = (avenrun[2] + offset) << shift; } -static unsigned long -calc_load(unsigned long load, unsigned long exp, unsigned long active) -{ - load *= exp; - load += active * (FIXED_1 - exp); - return load >> FSHIFT; -} - /* * calc_load - update the avenrun load estimates 10 ticks after the * CPUs have updated calc_load_tasks. */ -void calc_global_load(void) +void calc_global_load(unsigned long ticks) { - unsigned long upd = calc_load_update + 10; long active; - if (time_before(jiffies, upd)) + calc_global_nohz(ticks); + + if (time_before(jiffies, calc_load_update + 10)) return; active = atomic_long_read(&calc_load_tasks); diff --git a/kernel/timer.c b/kernel/timer.c index 68a9ae7679b7..7bd715fda974 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1319,7 +1319,7 @@ void do_timer(unsigned long ticks) { jiffies_64 += ticks; update_wall_time(); - calc_global_load(); + calc_global_load(ticks); } #ifdef __ARCH_WANT_SYS_ALARM -- cgit v1.2.3-70-g09d2 From 67631510a318d5a930055fe927607f483716e100 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 8 Dec 2010 12:16:33 -0800 Subject: tcp: Replace time wait bucket msg by counter Rather than printing the message to the log, use a mib counter to keep track of the count of occurences of time wait bucket overflow. Reduces spam in logs. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_minisocks.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index ebb0c80ffd6e..12b2b18e50c1 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -230,6 +230,7 @@ enum LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */ LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ + LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1b48eb1ed453..b14ec7d03b6e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), + SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43cf901d7659..a66735f75963 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -347,7 +347,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * socket up. We've got bigger problems than * non-graceful socket closings. */ - LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n"); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); -- cgit v1.2.3-70-g09d2 From d9ca676bcb26e1fdff9265a3e70f697cd381c889 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 8 Dec 2010 19:40:47 +0000 Subject: atm: correct sysfs 'device' link creation and parent relationships The ATM subsystem was incorrectly creating the 'device' link for ATM nodes in sysfs. This led to incorrect device/parent relationships exposed by sysfs and udev. Instead of rolling the 'device' link by hand in the generic ATM code, pass each ATM driver's bus device down to the sysfs code and let sysfs do this stuff correctly. Signed-off-by: Dan Williams Signed-off-by: David S. Miller --- drivers/atm/adummy.c | 2 +- drivers/atm/ambassador.c | 3 ++- drivers/atm/atmtcp.c | 2 +- drivers/atm/eni.c | 2 +- drivers/atm/firestream.c | 2 +- drivers/atm/fore200e.c | 14 +++++++------- drivers/atm/he.c | 2 +- drivers/atm/horizon.c | 3 ++- drivers/atm/idt77252.c | 3 ++- drivers/atm/iphase.c | 2 +- drivers/atm/lanai.c | 2 +- drivers/atm/nicstar.c | 3 ++- drivers/atm/solos-pci.c | 8 ++++---- drivers/atm/zatm.c | 2 +- drivers/usb/atm/usbatm.c | 15 +++------------ include/linux/atmdev.h | 6 ++++-- net/atm/atm_sysfs.c | 3 ++- net/atm/resources.c | 7 ++++--- net/atm/resources.h | 2 +- 19 files changed, 41 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c index 46b94762125b..f9b983ae6877 100644 --- a/drivers/atm/adummy.c +++ b/drivers/atm/adummy.c @@ -154,7 +154,7 @@ static int __init adummy_init(void) err = -ENOMEM; goto out; } - atm_dev = atm_dev_register(DEV_LABEL, &adummy_ops, -1, NULL); + atm_dev = atm_dev_register(DEV_LABEL, NULL, &adummy_ops, -1, NULL); if (!atm_dev) { printk(KERN_ERR DEV_LABEL ": atm_dev_register() failed\n"); err = -ENODEV; diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index a33896a482e6..ffe9b655292e 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -2244,7 +2244,8 @@ static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_ goto out_reset; } - dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL); + dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1, + NULL); if (!dev->atm_dev) { PRINTD (DBG_ERR, "failed to register Madge ATM adapter"); err = -EINVAL; diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b9101818b47b..2b464b631f22 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -366,7 +366,7 @@ static int atmtcp_create(int itf,int persist,struct atm_dev **result) if (!dev_data) return -ENOMEM; - dev = atm_dev_register(DEV_LABEL,&atmtcp_v_dev_ops,itf,NULL); + dev = atm_dev_register(DEV_LABEL,NULL,&atmtcp_v_dev_ops,itf,NULL); if (!dev) { kfree(dev_data); return itf == -1 ? -ENOMEM : -EBUSY; diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 97c5898cd76e..c495fae74200 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2244,7 +2244,7 @@ static int __devinit eni_init_one(struct pci_dev *pci_dev, &zeroes); if (!cpu_zeroes) goto out1; } - dev = atm_dev_register(DEV_LABEL,&ops,-1,NULL); + dev = atm_dev_register(DEV_LABEL, &pci_dev->dev, &ops, -1, NULL); if (!dev) goto out2; pci_set_drvdata(pci_dev, dev); eni_dev->pci_dev = pci_dev; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 5d86bb803e94..7d912baf01d4 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1911,7 +1911,7 @@ static int __devinit firestream_init_one (struct pci_dev *pci_dev, fs_dev, sizeof (struct fs_dev)); if (!fs_dev) goto err_out; - atm_dev = atm_dev_register("fs", &ops, -1, NULL); + atm_dev = atm_dev_register("fs", &pci_dev->dev, &ops, -1, NULL); if (!atm_dev) goto err_out_free_fs_dev; diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index c8fc69c85a06..962c309b40c0 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2567,14 +2567,14 @@ release: static int __devinit -fore200e_register(struct fore200e* fore200e) +fore200e_register(struct fore200e* fore200e, struct device *parent) { struct atm_dev* atm_dev; DPRINTK(2, "device %s being registered\n", fore200e->name); - atm_dev = atm_dev_register(fore200e->bus->proc_name, &fore200e_ops, -1, - NULL); + atm_dev = atm_dev_register(fore200e->bus->proc_name, parent, &fore200e_ops, + -1, NULL); if (atm_dev == NULL) { printk(FORE200E "unable to register device %s\n", fore200e->name); return -ENODEV; @@ -2594,9 +2594,9 @@ fore200e_register(struct fore200e* fore200e) static int __devinit -fore200e_init(struct fore200e* fore200e) +fore200e_init(struct fore200e* fore200e, struct device *parent) { - if (fore200e_register(fore200e) < 0) + if (fore200e_register(fore200e, parent) < 0) return -ENODEV; if (fore200e->bus->configure(fore200e) < 0) @@ -2662,7 +2662,7 @@ static int __devinit fore200e_sba_probe(struct platform_device *op, sprintf(fore200e->name, "%s-%d", bus->model_name, index); - err = fore200e_init(fore200e); + err = fore200e_init(fore200e, &op->dev); if (err < 0) { fore200e_shutdown(fore200e); kfree(fore200e); @@ -2740,7 +2740,7 @@ fore200e_pca_detect(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent sprintf(fore200e->name, "%s-%d", bus->model_name, index); - err = fore200e_init(fore200e); + err = fore200e_init(fore200e, &pci_dev->dev); if (err < 0) { fore200e_shutdown(fore200e); goto out_free; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 801e8b6e9d1f..6cf59bf281dc 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -366,7 +366,7 @@ he_init_one(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent) goto init_one_failure; } - atm_dev = atm_dev_register(DEV_LABEL, &he_ops, -1, NULL); + atm_dev = atm_dev_register(DEV_LABEL, &pci_dev->dev, &he_ops, -1, NULL); if (!atm_dev) { err = -ENODEV; goto init_one_failure; diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index a95790452a68..24761e1d6642 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2733,7 +2733,8 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_ PRINTD(DBG_INFO, "found Madge ATM adapter (hrz) at: IO %x, IRQ %u, MEM %p", iobase, irq, membase); - dev->atm_dev = atm_dev_register(DEV_LABEL, &hrz_ops, -1, NULL); + dev->atm_dev = atm_dev_register(DEV_LABEL, &pci_dev->dev, &hrz_ops, -1, + NULL); if (!(dev->atm_dev)) { PRINTD(DBG_ERR, "failed to register Madge ATM adapter"); err = -EINVAL; diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index bce57328ddde..bfb7feee0400 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3698,7 +3698,8 @@ idt77252_init_one(struct pci_dev *pcidev, const struct pci_device_id *id) goto err_out_iounmap; } - dev = atm_dev_register("idt77252", &idt77252_ops, -1, NULL); + dev = atm_dev_register("idt77252", &pcidev->dev, &idt77252_ops, -1, + NULL); if (!dev) { printk("%s: can't register atm device\n", card->name); err = -EIO; diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 9309d4724e13..729254053758 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -3172,7 +3172,7 @@ static int __devinit ia_init_one(struct pci_dev *pdev, ret = -ENODEV; goto err_out_free_iadev; } - dev = atm_dev_register(DEV_LABEL, &ops, -1, NULL); + dev = atm_dev_register(DEV_LABEL, &pdev->dev, &ops, -1, NULL); if (!dev) { ret = -ENOMEM; goto err_out_disable_dev; diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index cbe15a86c669..a395c9aab146 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2591,7 +2591,7 @@ static int __devinit lanai_init_one(struct pci_dev *pci, return -ENOMEM; } - atmdev = atm_dev_register(DEV_LABEL, &ops, -1, NULL); + atmdev = atm_dev_register(DEV_LABEL, &pci->dev, &ops, -1, NULL); if (atmdev == NULL) { printk(KERN_ERR DEV_LABEL ": couldn't register atm device!\n"); diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 2f3516b7f118..6b313ee9231b 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -771,7 +771,8 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) } /* Register device */ - card->atmdev = atm_dev_register("nicstar", &atm_ops, -1, NULL); + card->atmdev = atm_dev_register("nicstar", &card->pcidev->dev, &atm_ops, + -1, NULL); if (card->atmdev == NULL) { printk("nicstar%d: can't register device.\n", i); error = 17; diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 2e08c996fd30..73fb1c4f4cd4 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -166,7 +166,7 @@ static irqreturn_t solos_irq(int irq, void *dev_id); static struct atm_vcc* find_vcc(struct atm_dev *dev, short vpi, int vci); static int list_vccs(int vci); static void release_vccs(struct atm_dev *dev); -static int atm_init(struct solos_card *); +static int atm_init(struct solos_card *, struct device *); static void atm_remove(struct solos_card *); static int send_command(struct solos_card *card, int dev, const char *buf, size_t size); static void solos_bh(unsigned long); @@ -1210,7 +1210,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) if (db_firmware_upgrade) flash_upgrade(card, 3); - err = atm_init(card); + err = atm_init(card, &dev->dev); if (err) goto out_free_irq; @@ -1233,7 +1233,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) return err; } -static int atm_init(struct solos_card *card) +static int atm_init(struct solos_card *card, struct device *parent) { int i; @@ -1244,7 +1244,7 @@ static int atm_init(struct solos_card *card) skb_queue_head_init(&card->tx_queue[i]); skb_queue_head_init(&card->cli_queue[i]); - card->atmdev[i] = atm_dev_register("solos-pci", &fpga_ops, -1, NULL); + card->atmdev[i] = atm_dev_register("solos-pci", parent, &fpga_ops, -1, NULL); if (!card->atmdev[i]) { dev_err(&card->dev->dev, "Could not register ATM device %d\n", i); atm_remove(card); diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 4e885d2da49c..624917902b65 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1597,7 +1597,7 @@ static int __devinit zatm_init_one(struct pci_dev *pci_dev, goto out; } - dev = atm_dev_register(DEV_LABEL, &ops, -1, NULL); + dev = atm_dev_register(DEV_LABEL, &pci_dev->dev, &ops, -1, NULL); if (!dev) goto out_free; diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index 05bf5a27b5b0..989e16e4ab5c 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -951,7 +951,9 @@ static int usbatm_atm_init(struct usbatm_data *instance) * condition: callbacks we register can be executed at once, before we have * initialized the struct atm_dev. To protect against this, all callbacks * abort if atm_dev->dev_data is NULL. */ - atm_dev = atm_dev_register(instance->driver_name, &usbatm_atm_devops, -1, NULL); + atm_dev = atm_dev_register(instance->driver_name, + &instance->usb_intf->dev, &usbatm_atm_devops, + -1, NULL); if (!atm_dev) { usb_err(instance, "%s: failed to register ATM device!\n", __func__); return -1; @@ -966,14 +968,6 @@ static int usbatm_atm_init(struct usbatm_data *instance) /* temp init ATM device, set to 128kbit */ atm_dev->link_rate = 128 * 1000 / 424; - ret = sysfs_create_link(&atm_dev->class_dev.kobj, - &instance->usb_intf->dev.kobj, "device"); - if (ret) { - atm_err(instance, "%s: sysfs_create_link failed: %d\n", - __func__, ret); - goto fail_sysfs; - } - if (instance->driver->atm_start && ((ret = instance->driver->atm_start(instance, atm_dev)) < 0)) { atm_err(instance, "%s: atm_start failed: %d!\n", __func__, ret); goto fail; @@ -992,8 +986,6 @@ static int usbatm_atm_init(struct usbatm_data *instance) return 0; fail: - sysfs_remove_link(&atm_dev->class_dev.kobj, "device"); - fail_sysfs: instance->atm_dev = NULL; atm_dev_deregister(atm_dev); /* usbatm_atm_dev_close will eventually be called */ return ret; @@ -1329,7 +1321,6 @@ void usbatm_usb_disconnect(struct usb_interface *intf) /* ATM finalize */ if (instance->atm_dev) { - sysfs_remove_link(&instance->atm_dev->class_dev.kobj, "device"); atm_dev_deregister(instance->atm_dev); instance->atm_dev = NULL; } diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index a8e4e832cdbb..475f8c42c0e9 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -427,8 +427,10 @@ extern rwlock_t vcc_sklist_lock; #define ATM_SKB(skb) (((struct atm_skb_data *) (skb)->cb)) -struct atm_dev *atm_dev_register(const char *type,const struct atmdev_ops *ops, - int number,unsigned long *flags); /* number == -1: pick first available */ +struct atm_dev *atm_dev_register(const char *type, struct device *parent, + const struct atmdev_ops *ops, + int number, /* -1 == pick first available */ + unsigned long *flags); struct atm_dev *atm_dev_lookup(int number); void atm_dev_deregister(struct atm_dev *dev); diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 799c631f0fed..f7fa67c78766 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -143,12 +143,13 @@ static struct class atm_class = { .dev_uevent = atm_uevent, }; -int atm_register_sysfs(struct atm_dev *adev) +int atm_register_sysfs(struct atm_dev *adev, struct device *parent) { struct device *cdev = &adev->class_dev; int i, j, err; cdev->class = &atm_class; + cdev->parent = parent; dev_set_drvdata(cdev, adev); dev_set_name(cdev, "%s%d", adev->type, adev->number); diff --git a/net/atm/resources.c b/net/atm/resources.c index d29e58261511..23f45ce6f351 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -74,8 +74,9 @@ struct atm_dev *atm_dev_lookup(int number) } EXPORT_SYMBOL(atm_dev_lookup); -struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, - int number, unsigned long *flags) +struct atm_dev *atm_dev_register(const char *type, struct device *parent, + const struct atmdev_ops *ops, int number, + unsigned long *flags) { struct atm_dev *dev, *inuse; @@ -115,7 +116,7 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, goto out_fail; } - if (atm_register_sysfs(dev) < 0) { + if (atm_register_sysfs(dev, parent) < 0) { pr_err("atm_register_sysfs failed for dev %s\n", type); atm_proc_dev_deregister(dev); goto out_fail; diff --git a/net/atm/resources.h b/net/atm/resources.h index 126fb1840dfb..521431e30507 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -42,6 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev) #endif /* CONFIG_PROC_FS */ -int atm_register_sysfs(struct atm_dev *adev); +int atm_register_sysfs(struct atm_dev *adev, struct device *parent); void atm_unregister_sysfs(struct atm_dev *adev); #endif -- cgit v1.2.3-70-g09d2 From d90aa92c0c1625d7f02050e4d2924805840cda3d Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 9 Dec 2010 16:50:52 +0800 Subject: acpi: fix _OSI string setup regression commit b0ed7a91(ACPICA/ACPI: Add new host interfaces for _OSI suppor) introduced a regression that _OSI string setup fails. There are 2 paths to setup _OSI string. DMI: acpi_dmi_osi_linux -> set_osi_linux -> acpi_osi_setup -> copy _OSI string to osi_setup_string Boot command line: acpi_osi_setup -> copy _OSI string to osi_setup_string Later, acpi_osi_setup_late will be called to handle osi_setup_string. If _OSI string is "Linux" or "!Linux", then the call path is, acpi_osi_setup_late -> acpi_cmdline_osi_linux -> set_osi_linux -> acpi_osi_setup -> copy _OSI string to osi_setup_string This actually never installs _OSI string(acpi_install_interface not called), but just copy the _OSI string to osi_setup_string. This patch fixes the regression. Reported-and-tested-by: Lukas Hejtmanek Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/osl.c | 59 +++++++++++++++++++++++++++++----------------------- include/linux/acpi.h | 2 +- 2 files changed, 34 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 966feddf6b1b..6867443c4941 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -152,8 +152,7 @@ static struct osi_linux { unsigned int enable:1; unsigned int dmi:1; unsigned int cmdline:1; - unsigned int known:1; -} osi_linux = { 0, 0, 0, 0}; +} osi_linux = {0, 0, 0}; static u32 acpi_osi_handler(acpi_string interface, u32 supported) { @@ -1055,13 +1054,22 @@ static int __init acpi_os_name_setup(char *str) __setup("acpi_os_name=", acpi_os_name_setup); +void __init acpi_osi_setup(char *str) +{ + if (!acpi_gbl_create_osi_method) + return; + + if (str == NULL || *str == '\0') { + printk(KERN_INFO PREFIX "_OSI method disabled\n"); + acpi_gbl_create_osi_method = FALSE; + } else + strncpy(osi_setup_string, str, OSI_STRING_LENGTH_MAX); +} + static void __init set_osi_linux(unsigned int enable) { - if (osi_linux.enable != enable) { + if (osi_linux.enable != enable) osi_linux.enable = enable; - printk(KERN_NOTICE PREFIX "%sed _OSI(Linux)\n", - enable ? "Add": "Delet"); - } if (osi_linux.enable) acpi_osi_setup("Linux"); @@ -1073,7 +1081,8 @@ static void __init set_osi_linux(unsigned int enable) static void __init acpi_cmdline_osi_linux(unsigned int enable) { - osi_linux.cmdline = 1; /* cmdline set the default */ + osi_linux.cmdline = 1; /* cmdline set the default and override DMI */ + osi_linux.dmi = 0; set_osi_linux(enable); return; @@ -1081,15 +1090,12 @@ static void __init acpi_cmdline_osi_linux(unsigned int enable) void __init acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d) { - osi_linux.dmi = 1; /* DMI knows that this box asks OSI(Linux) */ - printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident); if (enable == -1) return; - osi_linux.known = 1; /* DMI knows which OSI(Linux) default needed */ - + osi_linux.dmi = 1; /* DMI knows that this box asks OSI(Linux) */ set_osi_linux(enable); return; @@ -1105,36 +1111,37 @@ void __init acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d) static void __init acpi_osi_setup_late(void) { char *str = osi_setup_string; + acpi_status status; if (*str == '\0') return; - if (!strcmp("!Linux", str)) { - acpi_cmdline_osi_linux(0); /* !enable */ - } else if (*str == '!') { - if (acpi_remove_interface(++str) == AE_OK) + if (*str == '!') { + status = acpi_remove_interface(++str); + + if (ACPI_SUCCESS(status)) printk(KERN_INFO PREFIX "Deleted _OSI(%s)\n", str); - } else if (!strcmp("Linux", str)) { - acpi_cmdline_osi_linux(1); /* enable */ } else { - if (acpi_install_interface(str) == AE_OK) + status = acpi_install_interface(str); + + if (ACPI_SUCCESS(status)) printk(KERN_INFO PREFIX "Added _OSI(%s)\n", str); } } -int __init acpi_osi_setup(char *str) +static int __init osi_setup(char *str) { - if (str == NULL || *str == '\0') { - printk(KERN_INFO PREFIX "_OSI method disabled\n"); - acpi_gbl_create_osi_method = FALSE; - } else { - strncpy(osi_setup_string, str, OSI_STRING_LENGTH_MAX); - } + if (str && !strcmp("Linux", str)) + acpi_cmdline_osi_linux(1); + else if (str && !strcmp("!Linux", str)) + acpi_cmdline_osi_linux(0); + else + acpi_osi_setup(str); return 1; } -__setup("acpi_osi=", acpi_osi_setup); +__setup("acpi_osi=", osi_setup); /* enable serialization to combat AE_ALREADY_EXISTS errors */ static int __init acpi_serialize_setup(char *str) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 050a7bccb836..67c91b4418b0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -219,7 +219,7 @@ static inline int acpi_video_display_switch_support(void) extern int acpi_blacklisted(void); extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d); -extern int acpi_osi_setup(char *str); +extern void acpi_osi_setup(char *str); #ifdef CONFIG_ACPI_NUMA int acpi_get_pxm(acpi_handle handle); -- cgit v1.2.3-70-g09d2 From 3353bebe7c89725ab2f476b9d8d618259402d52e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 30 Nov 2010 18:21:46 -0500 Subject: ACPI: video: fix build for VIDEO_OUTPUT_CONTROL=n drivers/built-in.o: In function `acpi_video_bus_put_devices': video.c:(.text+0x79663): undefined reference to `video_output_unregister' drivers/built-in.o: In function `acpi_video_bus_add': video.c:(.text+0x7b0b3): undefined reference to `video_output_register' Signed-off-by: Len Brown --- include/linux/video_output.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/video_output.h b/include/linux/video_output.h index 2fb46bc9340d..ed5cdeb3604d 100644 --- a/include/linux/video_output.h +++ b/include/linux/video_output.h @@ -23,6 +23,7 @@ #ifndef _LINUX_VIDEO_OUTPUT_H #define _LINUX_VIDEO_OUTPUT_H #include +#include struct output_device; struct output_properties { int (*set_state)(struct output_device *); @@ -34,9 +35,23 @@ struct output_device { struct device dev; }; #define to_output_device(obj) container_of(obj, struct output_device, dev) +#if defined(CONFIG_VIDEO_OUTPUT_CONTROL) || defined(CONFIG_VIDEO_OUTPUT_CONTROL_MODULE) struct output_device *video_output_register(const char *name, struct device *dev, void *devdata, struct output_properties *op); void video_output_unregister(struct output_device *dev); +#else +static struct output_device *video_output_register(const char *name, + struct device *dev, + void *devdata, + struct output_properties *op) +{ + return ERR_PTR(-ENODEV); +} +static void video_output_unregister(struct output_device *dev) +{ + return; +} +#endif #endif -- cgit v1.2.3-70-g09d2 From b72512ed706efb26087fcbbc5f98ed64ac1230d5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 5 Sep 2010 14:51:49 +0100 Subject: ACPI: video: fix build for CONFIG_ACPI=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In file included from drivers/gpu/drm/i915/intel_opregion.c:30: include/acpi/video.h:22: warning: ‘struct acpi_device’ declared inside parameter list ... include/acpi/video.h:24: error: ‘ENODEV’ undeclared (first use in this function) Signed-off-by: Chris Wilson Signed-off-by: Len Brown --- include/acpi/video.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/video.h b/include/acpi/video.h index 551793c9b6e8..0e98e679d3a7 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -1,6 +1,10 @@ #ifndef __ACPI_VIDEO_H #define __ACPI_VIDEO_H +#include /* for ENODEV */ + +struct acpi_device; + #define ACPI_VIDEO_DISPLAY_CRT 1 #define ACPI_VIDEO_DISPLAY_TV 2 #define ACPI_VIDEO_DISPLAY_DVI 3 @@ -26,4 +30,3 @@ static inline int acpi_video_get_edid(struct acpi_device *device, int type, #endif #endif - -- cgit v1.2.3-70-g09d2 From 53dde5f385bc56e312f78b7cb25ffaf8efd4735d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 16 Nov 2010 13:23:50 -0800 Subject: bootmem: Add alloc_bootmem_align() Add an alloc_bootmem_align() interface to allocate bootmem with specified alignment. This is necessary to be able to allocate the xsave area in a subsequent patch. Signed-off-by: Suresh Siddha LKML-Reference: <20101116212441.977574826@sbsiddha-MOBL3.sc.intel.com> Acked-by: H. Peter Anvin Signed-off-by: H. Peter Anvin Cc: --- include/linux/bootmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 266ab9291232..499dfe982a0e 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -105,6 +105,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_align(x, align) \ + __alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages(x) \ -- cgit v1.2.3-70-g09d2 From ab4e0192196b8d4e43a3945742d4996da934a86f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 14 Dec 2010 23:53:21 -0800 Subject: Input: define separate EVIOCGKEYCODE_V2/EVIOCSKEYCODE_V2 The desire to keep old names for the EVIOCGKEYCODE/EVIOCSKEYCODE while extending them to support large scancodes was a mistake. While we tried to keep ABI intact (and we succeeded in doing that, programs compiled on older kernels will work on newer ones) there is still a problem with recompiling existing software with newer kernel headers. New kernel headers will supply updated ioctl numbers and kernel will expect that userspace will use struct input_keymap_entry to set and retrieve keymap data. But since the names of ioctls are still the same userspace will happily compile even if not adjusted to make use of the new structure and will start miraculously fail in the field. To avoid this issue let's revert EVIOCGKEYCODE/EVIOCSKEYCODE definitions and add EVIOCGKEYCODE_V2/EVIOCSKEYCODE_V2 so that userspace can explicitly select the style of ioctls it wants to employ. Reviewed-by: Henrik Rydberg Acked-by: Jarod Wilson Acked-by: Mauro Carvalho Chehab Signed-off-by: Dmitry Torokhov --- drivers/input/evdev.c | 113 ++++++++++++++++++++++++++------------------------ include/linux/input.h | 6 ++- 2 files changed, 62 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index e3f7fc6f9565..68f09a868434 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -534,76 +534,73 @@ static int handle_eviocgbit(struct input_dev *dev, } #undef OLD_KEY_MAX -static int evdev_handle_get_keycode(struct input_dev *dev, - void __user *p, size_t size) +static int evdev_handle_get_keycode(struct input_dev *dev, void __user *p) { - struct input_keymap_entry ke; + struct input_keymap_entry ke = { + .len = sizeof(unsigned int), + .flags = 0, + }; + int __user *ip = (int __user *)p; int error; - memset(&ke, 0, sizeof(ke)); - - if (size == sizeof(unsigned int[2])) { - /* legacy case */ - int __user *ip = (int __user *)p; + /* legacy case */ + if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) + return -EFAULT; - if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) - return -EFAULT; + error = input_get_keycode(dev, &ke); + if (error) + return error; - ke.len = sizeof(unsigned int); - ke.flags = 0; + if (put_user(ke.keycode, ip + 1)) + return -EFAULT; - error = input_get_keycode(dev, &ke); - if (error) - return error; + return 0; +} - if (put_user(ke.keycode, ip + 1)) - return -EFAULT; +static int evdev_handle_get_keycode_v2(struct input_dev *dev, void __user *p) +{ + struct input_keymap_entry ke; + int error; - } else { - size = min(size, sizeof(ke)); + if (copy_from_user(&ke, p, sizeof(ke))) + return -EFAULT; - if (copy_from_user(&ke, p, size)) - return -EFAULT; + error = input_get_keycode(dev, &ke); + if (error) + return error; - error = input_get_keycode(dev, &ke); - if (error) - return error; + if (copy_to_user(p, &ke, sizeof(ke))) + return -EFAULT; - if (copy_to_user(p, &ke, size)) - return -EFAULT; - } return 0; } -static int evdev_handle_set_keycode(struct input_dev *dev, - void __user *p, size_t size) +static int evdev_handle_set_keycode(struct input_dev *dev, void __user *p) { - struct input_keymap_entry ke; - - memset(&ke, 0, sizeof(ke)); + struct input_keymap_entry ke = { + .len = sizeof(unsigned int), + .flags = 0, + }; + int __user *ip = (int __user *)p; - if (size == sizeof(unsigned int[2])) { - /* legacy case */ - int __user *ip = (int __user *)p; + if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) + return -EFAULT; - if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) - return -EFAULT; + if (get_user(ke.keycode, ip + 1)) + return -EFAULT; - if (get_user(ke.keycode, ip + 1)) - return -EFAULT; + return input_set_keycode(dev, &ke); +} - ke.len = sizeof(unsigned int); - ke.flags = 0; +static int evdev_handle_set_keycode_v2(struct input_dev *dev, void __user *p) +{ + struct input_keymap_entry ke; - } else { - size = min(size, sizeof(ke)); + if (copy_from_user(&ke, p, sizeof(ke))) + return -EFAULT; - if (copy_from_user(&ke, p, size)) - return -EFAULT; - - if (ke.len > sizeof(ke.scancode)) - return -EINVAL; - } + if (ke.len > sizeof(ke.scancode)) + return -EINVAL; return input_set_keycode(dev, &ke); } @@ -669,6 +666,18 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return evdev_grab(evdev, client); else return evdev_ungrab(evdev, client); + + case EVIOCGKEYCODE: + return evdev_handle_get_keycode(dev, p); + + case EVIOCSKEYCODE: + return evdev_handle_set_keycode(dev, p); + + case EVIOCGKEYCODE_V2: + return evdev_handle_get_keycode_v2(dev, p); + + case EVIOCSKEYCODE_V2: + return evdev_handle_set_keycode_v2(dev, p); } size = _IOC_SIZE(cmd); @@ -708,12 +717,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return -EFAULT; return error; - - case EVIOC_MASK_SIZE(EVIOCGKEYCODE): - return evdev_handle_get_keycode(dev, p, size); - - case EVIOC_MASK_SIZE(EVIOCSKEYCODE): - return evdev_handle_set_keycode(dev, p, size); } /* Multi-number variable-length handlers */ diff --git a/include/linux/input.h b/include/linux/input.h index a8af21d42bc1..9777668883be 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -104,8 +104,10 @@ struct input_keymap_entry { #define EVIOCGREP _IOR('E', 0x03, unsigned int[2]) /* get repeat settings */ #define EVIOCSREP _IOW('E', 0x03, unsigned int[2]) /* set repeat settings */ -#define EVIOCGKEYCODE _IOR('E', 0x04, struct input_keymap_entry) /* get keycode */ -#define EVIOCSKEYCODE _IOW('E', 0x04, struct input_keymap_entry) /* set keycode */ +#define EVIOCGKEYCODE _IOR('E', 0x04, unsigned int[2]) /* get keycode */ +#define EVIOCGKEYCODE_V2 _IOR('E', 0x04, struct input_keymap_entry) +#define EVIOCSKEYCODE _IOW('E', 0x04, unsigned int[2]) /* set keycode */ +#define EVIOCSKEYCODE_V2 _IOW('E', 0x04, struct input_keymap_entry) #define EVIOCGNAME(len) _IOC(_IOC_READ, 'E', 0x06, len) /* get device name */ #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ -- cgit v1.2.3-70-g09d2 From 62731fa0c893515dc6cbc3e0a2879a92793c735f Mon Sep 17 00:00:00 2001 From: Alexey Zaytsev Date: Mon, 22 Nov 2010 00:33:03 +0000 Subject: fanotify: split version into version and metadata_len To implement per event type optional headers we are interested in knowing how long the metadata structure is. This patch slits the __u32 version field into a __u8 version and a __u16 metadata_len field (with __u8 left over). This should allow for backwards compat ABI. Signed-off-by: Alexey Zaytsev [rewrote descrtion and changed object sizes and ordering - eparis] Signed-off-by: Eric Paris --- include/linux/fanotify.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c73224315aee..6c6133f76e16 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -83,11 +83,13 @@ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) -#define FANOTIFY_METADATA_VERSION 2 +#define FANOTIFY_METADATA_VERSION 3 struct fanotify_event_metadata { __u32 event_len; - __u32 vers; + __u8 vers; + __u8 reserved; + __u16 metadata_len; __aligned_u64 mask; __s32 fd; __s32 pid; -- cgit v1.2.3-70-g09d2 From 667c78afaec0ac500908e191e8f236e9578d7b1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 8 Dec 2010 12:39:12 -0800 Subject: xen: Provide a variant of __RING_SIZE() that is an integer constant expression Without this, gcc 4.5 won't compile xen-netfront and xen-blkfront, where this is being used to specify array sizes. Signed-off-by: Jan Beulich Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: David Miller Cc: Stable Kernel Signed-off-by: Linus Torvalds --- drivers/block/xen-blkfront.c | 2 +- drivers/net/xen-netfront.c | 4 ++-- include/xen/interface/io/ring.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4f9e22f29138..657873e4328d 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -72,7 +72,7 @@ struct blk_shadow { static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) /* * We have one of these per vbd, whether ide, scsi or 'other'. They diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 458bb57914a3..cdbeec9f83ea 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,8 +66,8 @@ struct netfront_cb { #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) +#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) +#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) struct netfront_info { diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index e8cbf431c8cc..75271b9a8f61 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -24,8 +24,15 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) + +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. -- cgit v1.2.3-70-g09d2 From f08f5a0add20834d3f3d876dfe08005a5df656db Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 16 Dec 2010 17:11:58 +0100 Subject: PM / Runtime: Fix pm_runtime_suspended() There are some situations (e.g. in __pm_generic_call()), where pm_runtime_suspended() is used to decide whether or not to execute a device's (system) ->suspend() callback. The callback is not executed if pm_runtime_suspended() returns true, but it does so for devices that don't even support runtime PM, because the power.disable_depth device field is ignored by it. This leads to problems (i.e. devices are not suspened when they should), so rework pm_runtime_suspended() so that it returns false if the device's power.disable_depth field is different from zero. Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- Documentation/power/runtime_pm.txt | 4 ++-- include/linux/pm_runtime.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 489e9bacd165..41cc7b30d7dd 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -379,8 +379,8 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: zero) bool pm_runtime_suspended(struct device *dev); - - return true if the device's runtime PM status is 'suspended', or false - otherwise + - return true if the device's runtime PM status is 'suspended' and its + 'power.disable_depth' field is equal to zero, or false otherwise void pm_runtime_allow(struct device *dev); - set the power.runtime_auto flag for the device and decrease its usage diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 3ec2358f8692..d19f1cca7f74 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -77,7 +77,8 @@ static inline void device_set_run_wake(struct device *dev, bool enable) static inline bool pm_runtime_suspended(struct device *dev) { - return dev->power.runtime_status == RPM_SUSPENDED; + return dev->power.runtime_status == RPM_SUSPENDED + && !dev->power.disable_depth; } static inline void pm_runtime_mark_last_busy(struct device *dev) -- cgit v1.2.3-70-g09d2 From 3f84622d7c7818077f5e6cf4b8a0d1b10dc65147 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 27 Nov 2010 19:26:32 +0100 Subject: SSB: Fix nvram_get on BCM47xx platform The nvram_get function was never in the mainline kernel, it only existed in an external OpenWrt patch. Use nvram_getenv function, which is in mainline and use an include instead of an extra function declaration. et0macaddr contains the mac address in text from like 00:11:22:33:44:55. We have to parse it before adding it into macaddr. nvram_parse_macaddr will be merged into asm/mach-bcm47xx/nvram.h through the MIPS git tree and will be available soon. It will not build now without nvram_parse_macaddr, but it hasn't before either. Signed-off-by: Hauke Mehrtens To: linux-mips@linux-mips.org Cc: mb@bu3sch.de Cc: netdev@vger.kernel.org Cc: Hauke Mehrtens Acked-by: Michael Buesch Patchwork: https://patchwork.linux-mips.org/patch/1849/ Signed-off-by: Ralf Baechle --- include/linux/ssb/ssb_driver_gige.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 942e38736901..eba52a100533 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -96,16 +96,21 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) return 0; } -extern char * nvram_get(const char *name); +#ifdef CONFIG_BCM47XX +#include /* Get the device MAC address */ static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) { -#ifdef CONFIG_BCM47XX - char *res = nvram_get("et0macaddr"); - if (res) - memcpy(macaddr, res, 6); -#endif + char buf[20]; + if (nvram_getenv("et0macaddr", buf, sizeof(buf)) < 0) + return; + nvram_parse_macaddr(buf, macaddr); } +#else +static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) +{ +} +#endif extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev, struct pci_dev *pdev); -- cgit v1.2.3-70-g09d2 From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Dec 2010 19:41:49 +0100 Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead When stacking devices, a request_queue is not always available. This forced us to have a no_cluster flag in the queue_limits that could be used as a carrier until the request_queue had been set up for a metadevice. There were several problems with that approach. First of all it was up to the stacking device to remember to set queue flag after stacking had completed. Also, the queue flag and the queue limits had to be kept in sync at all times. We got that wrong, which could lead to us issuing commands that went beyond the max scatterlist limit set by the driver. The proper fix is to avoid having two flags for tracking the same thing. We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the block layer merging functions. The queue_limit 'no_cluster' is turned into 'cluster' to avoid double negatives and to ease stacking. Clustering defaults to being enabled as before. The queue flag logic is removed from the stacking function, and explicitly setting the cluster flag is no longer necessary in DM and MD. Reported-by: Ed Lin Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-merge.c | 6 +++--- block/blk-settings.c | 25 ++----------------------- block/blk-sysfs.c | 2 +- drivers/md/dm-table.c | 5 ----- drivers/md/md.c | 3 --- drivers/scsi/scsi_lib.c | 3 +-- include/linux/blkdev.h | 9 ++++++--- 7 files changed, 13 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/block/blk-merge.c b/block/blk-merge.c index 77b7c26df6b5..74bc4a768f32 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -21,7 +21,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, return 0; fbio = bio; - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + cluster = blk_queue_cluster(q); seg_size = 0; nr_phys_segs = 0; for_each_bio(bio) { @@ -87,7 +87,7 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + if (!blk_queue_cluster(q)) return 0; if (bio->bi_seg_back_size + nxt->bi_seg_front_size > @@ -123,7 +123,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nsegs, cluster; nsegs = 0; - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + cluster = blk_queue_cluster(q); /* * for each bio in rq diff --git a/block/blk-settings.c b/block/blk-settings.c index 701859fb9647..e55f5fc4ca22 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -126,7 +126,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; - lim->no_cluster = 0; + lim->cluster = 1; } EXPORT_SYMBOL(blk_set_default_limits); @@ -464,15 +464,6 @@ EXPORT_SYMBOL(blk_queue_io_opt); void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { blk_stack_limits(&t->limits, &b->limits, 0); - - if (!t->queue_lock) - WARN_ON_ONCE(1); - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { - unsigned long flags; - spin_lock_irqsave(t->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(t->queue_lock, flags); - } } EXPORT_SYMBOL(blk_queue_stack_limits); @@ -545,7 +536,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm(t->io_opt, b->io_opt); - t->no_cluster |= b->no_cluster; + t->cluster &= b->cluster; t->discard_zeroes_data &= b->discard_zeroes_data; /* Physical block size a multiple of the logical block size? */ @@ -641,7 +632,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset) { struct request_queue *t = disk->queue; - struct request_queue *b = bdev_get_queue(bdev); if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; @@ -652,17 +642,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", top, bottom); } - - if (!t->queue_lock) - WARN_ON_ONCE(1); - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { - unsigned long flags; - - spin_lock_irqsave(t->queue_lock, flags); - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(t->queue_lock, flags); - } } EXPORT_SYMBOL(disk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 013457f47fdc..41fb69150b4d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -119,7 +119,7 @@ static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char * static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) { - if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + if (blk_queue_cluster(q)) return queue_var_show(queue_max_segment_size(q), (page)); return queue_var_show(PAGE_CACHE_SIZE, (page)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 90267f8d64ee..e2da1912a2cb 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1131,11 +1131,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, */ q->limits = *limits; - if (limits->no_cluster) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); - else - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); - if (!dm_table_supports_discards(t)) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); else diff --git a/drivers/md/md.c b/drivers/md/md.c index 84c46a161927..52694d29663d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4296,9 +4296,6 @@ static int md_alloc(dev_t dev, char *name) goto abort; mddev->queue->queuedata = mddev; - /* Can be unlocked because the queue is new: no concurrency */ - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); - blk_queue_make_request(mddev->queue, md_make_request); disk = alloc_disk(1 << shift); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eafeeda6e194..9d7ba07dc5ef 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1642,9 +1642,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); - /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); + q->limits.cluster = 0; /* * set a reasonable default alignment on word boundaries: the diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aae86fd10c4f..95aeeeb49e8b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; - unsigned char no_cluster; + unsigned char cluster; signed char discard_zeroes_data; }; @@ -380,7 +380,6 @@ struct request_queue #endif }; -#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ @@ -403,7 +402,6 @@ struct request_queue #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_ADD_RANDOM)) @@ -510,6 +508,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define rq_data_dir(rq) ((rq)->cmd_flags & 1) +static inline unsigned int blk_queue_cluster(struct request_queue *q) +{ + return q->limits.cluster; +} + /* * We regard a request as sync, if either a read or a sync write */ -- cgit v1.2.3-70-g09d2 From 72d4cd9f38b5ed96b75df4c622be25e1c2648dd3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 17 Dec 2010 08:34:20 +0100 Subject: block: max hardware sectors limit wrapper Implement blk_limits_max_hw_sectors() and make blk_queue_max_hw_sectors() a wrapper around it. DM needs this to avoid setting queue_limits' max_hw_sectors and max_sectors directly. dm_set_device_limits() now leverages blk_limits_max_hw_sectors() logic to establish the appropriate max_hw_sectors minimum (PAGE_SIZE). Fixes issue where DM was incorrectly setting max_sectors rather than max_hw_sectors (which caused dm_merge_bvec()'s max_hw_sectors check to be ineffective). Signed-off-by: Mike Snitzer Cc: stable@kernel.org Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 26 ++++++++++++++++++++------ drivers/md/dm-table.c | 5 ++--- include/linux/blkdev.h | 1 + 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index e55f5fc4ca22..36c8c1f2af18 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -229,8 +229,8 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) EXPORT_SYMBOL(blk_queue_bounce_limit); /** - * blk_queue_max_hw_sectors - set max sectors for a request for this queue - * @q: the request queue for the device + * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request + * @limits: the queue limits * @max_hw_sectors: max hardware sectors in the usual 512b unit * * Description: @@ -244,7 +244,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); * per-device basis in /sys/block//queue/max_sectors_kb. * The soft limit can not exceed max_hw_sectors. **/ -void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) +void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors) { if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); @@ -252,9 +252,23 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto __func__, max_hw_sectors); } - q->limits.max_hw_sectors = max_hw_sectors; - q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, - BLK_DEF_MAX_SECTORS); + limits->max_hw_sectors = max_hw_sectors; + limits->max_sectors = min_t(unsigned int, max_hw_sectors, + BLK_DEF_MAX_SECTORS); +} +EXPORT_SYMBOL(blk_limits_max_hw_sectors); + +/** + * blk_queue_max_hw_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_hw_sectors: max hardware sectors in the usual 512b unit + * + * Description: + * See description for blk_limits_max_hw_sectors(). + **/ +void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) +{ + blk_limits_max_hw_sectors(&q->limits, max_hw_sectors); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e2da1912a2cb..4d705cea0f8c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -517,9 +517,8 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, */ if (q->merge_bvec_fn && !ti->type->merge) - limits->max_sectors = - min_not_zero(limits->max_sectors, - (unsigned int) (PAGE_SIZE >> 9)); + blk_limits_max_hw_sectors(limits, + (unsigned int) (PAGE_SIZE >> 9)); return 0; } EXPORT_SYMBOL_GPL(dm_set_device_limits); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95aeeeb49e8b..36ab42c9bb99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -808,6 +808,7 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *, extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); +extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -- cgit v1.2.3-70-g09d2 From b6aa5901c7a2bd90d0b6b9866300d2648b2568f3 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Wed, 15 Dec 2010 20:45:41 -0800 Subject: ceph: mark user pages dirty on direct-io reads For read operation, we have to set the argument _write_ of get_user_pages to 1 since we will write data to pages. Also, we need to SetPageDirty before releasing these pages. Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/file.c | 8 ++++---- include/linux/ceph/libceph.h | 6 ++++-- net/ceph/pagevec.c | 11 +++++++---- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e860d8f1bb45..7d0e4a82d898 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -384,7 +384,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, if (file->f_flags & O_DIRECT) { num_pages = calc_pages_for((unsigned long)data, len); - pages = ceph_get_direct_page_vector(data, num_pages); + pages = ceph_get_direct_page_vector(data, num_pages, true); } else { num_pages = calc_pages_for(off, len); pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); @@ -413,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, done: if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, true); else ceph_release_page_vector(pages, num_pages); dout("sync_read result %d\n", ret); @@ -522,7 +522,7 @@ more: return -ENOMEM; if (file->f_flags & O_DIRECT) { - pages = ceph_get_direct_page_vector(data, num_pages); + pages = ceph_get_direct_page_vector(data, num_pages, false); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto out; @@ -572,7 +572,7 @@ more: } if (file->f_flags & O_DIRECT) - ceph_put_page_vector(pages, num_pages); + ceph_put_page_vector(pages, num_pages, false); else if (file->f_flags & O_SYNC) ceph_release_page_vector(pages, num_pages); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 9e76d35670d2..72c72bfccb88 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -227,8 +227,10 @@ extern int ceph_open_session(struct ceph_client *client); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages); -extern void ceph_put_page_vector(struct page **pages, int num_pages); + int num_pages, + bool write_page); +extern void ceph_put_page_vector(struct page **pages, int num_pages, + bool dirty); extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index ac34feeb2b3a..01947a5d03b7 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -13,7 +13,7 @@ * build a vector of user pages */ struct page **ceph_get_direct_page_vector(const char __user *data, - int num_pages) + int num_pages, bool write_page) { struct page **pages; int rc; @@ -24,7 +24,7 @@ struct page **ceph_get_direct_page_vector(const char __user *data, down_read(¤t->mm->mmap_sem); rc = get_user_pages(current, current->mm, (unsigned long)data, - num_pages, 0, 0, pages, NULL); + num_pages, write_page, 0, pages, NULL); up_read(¤t->mm->mmap_sem); if (rc < 0) goto fail; @@ -36,12 +36,15 @@ fail: } EXPORT_SYMBOL(ceph_get_direct_page_vector); -void ceph_put_page_vector(struct page **pages, int num_pages) +void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty) { int i; - for (i = 0; i < num_pages; i++) + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty_lock(pages[i]); put_page(pages[i]); + } kfree(pages); } EXPORT_SYMBOL(ceph_put_page_vector); -- cgit v1.2.3-70-g09d2 From c0f5ac5426f7fd82b23dd5c6a1e633b290294a08 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 16 Dec 2010 10:38:41 -0700 Subject: Revert "resources: support allocating space within a region from the top down" This reverts commit e7f8567db9a7f6b3151b0b275e245c1cef0d9c70. Acked-by: H. Peter Anvin Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 5 -- include/linux/ioport.h | 1 - kernel/resource.c | 98 ++----------------------------------- 3 files changed, 4 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cdd2a6e8a3b7..8b61c9360999 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2175,11 +2175,6 @@ and is between 256 and 4096 characters. It is defined in the file reset_devices [KNL] Force drivers to reset the underlying device during initialization. - resource_alloc_from_bottom - Allocate new resources from the beginning of available - space, not the end. If you need to use this, please - report a bug. - resume= [SWSUSP] Specify the partition device for software suspend diff --git a/include/linux/ioport.h b/include/linux/ioport.h index d377ea815d45..b22790268b64 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -112,7 +112,6 @@ struct resource_list { /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; -extern int resource_alloc_from_bottom; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); diff --git a/kernel/resource.c b/kernel/resource.c index 9fad33efd0db..560659f7baef 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource); static DEFINE_RWLOCK(resource_lock); -/* - * By default, we allocate free space bottom-up. The architecture can request - * top-down by clearing this flag. The user can override the architecture's - * choice with the "resource_alloc_from_bottom" kernel boot option, but that - * should only be a debugging tool. - */ -int resource_alloc_from_bottom = 1; - -static __init int setup_alloc_from_bottom(char *s) -{ - printk(KERN_INFO - "resource: allocating from bottom-up; please report a bug\n"); - resource_alloc_from_bottom = 1; - return 0; -} -early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); - static void *r_next(struct seq_file *m, void *v, loff_t *pos) { struct resource *p = v; @@ -396,75 +379,8 @@ static bool resource_contains(struct resource *res1, struct resource *res2) return res1->start <= res2->start && res1->end >= res2->end; } -/* - * Find the resource before "child" in the sibling list of "root" children. - */ -static struct resource *find_sibling_prev(struct resource *root, struct resource *child) -{ - struct resource *this; - - for (this = root->child; this; this = this->sibling) - if (this->sibling == child) - return this; - - return NULL; -} - -/* - * Find empty slot in the resource tree given range and alignment. - * This version allocates from the end of the root resource first. - */ -static int find_resource_from_top(struct resource *root, struct resource *new, - resource_size_t size, resource_size_t min, - resource_size_t max, resource_size_t align, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), - void *alignf_data) -{ - struct resource *this; - struct resource tmp, avail, alloc; - - tmp.start = root->end; - tmp.end = root->end; - - this = find_sibling_prev(root, NULL); - for (;;) { - if (this) { - if (this->end < root->end) - tmp.start = this->end + 1; - } else - tmp.start = root->start; - - resource_clip(&tmp, min, max); - - /* Check for overflow after ALIGN() */ - avail = *new; - avail.start = ALIGN(tmp.start, align); - avail.end = tmp.end; - if (avail.start >= tmp.start) { - alloc.start = alignf(alignf_data, &avail, size, align); - alloc.end = alloc.start + size - 1; - if (resource_contains(&avail, &alloc)) { - new->start = alloc.start; - new->end = alloc.end; - return 0; - } - } - - if (!this || this->start == root->start) - break; - - tmp.end = this->start - 1; - this = find_sibling_prev(root, this); - } - return -EBUSY; -} - /* * Find empty slot in the resource tree given range and alignment. - * This version allocates from the beginning of the root resource first. */ static int find_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, @@ -480,15 +396,14 @@ static int find_resource(struct resource *root, struct resource *new, tmp.start = root->start; /* - * Skip past an allocated resource that starts at 0, since the - * assignment of this->start - 1 to tmp->end below would cause an - * underflow. + * Skip past an allocated resource that starts at 0, since the assignment + * of this->start - 1 to tmp->end below would cause an underflow. */ if (this && this->start == 0) { tmp.start = this->end + 1; this = this->sibling; } - for (;;) { + for(;;) { if (this) tmp.end = this->start - 1; else @@ -509,10 +424,8 @@ static int find_resource(struct resource *root, struct resource *new, return 0; } } - if (!this) break; - tmp.start = this->end + 1; this = this->sibling; } @@ -545,10 +458,7 @@ int allocate_resource(struct resource *root, struct resource *new, alignf = simple_align_resource; write_lock(&resource_lock); - if (resource_alloc_from_bottom) - err = find_resource(root, new, size, min, max, align, alignf, alignf_data); - else - err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); + err = find_resource(root, new, size, min, max, align, alignf, alignf_data); if (err >= 0 && __request_resource(root, new)) err = -EBUSY; write_unlock(&resource_lock); -- cgit v1.2.3-70-g09d2 From fcb119183c73bf0781009713f303e28b1fb13d3e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 16 Dec 2010 10:38:46 -0700 Subject: resources: add arch hook for preventing allocation in reserved areas This adds arch_remove_reservations(), which an arch can implement if it needs to protect part of the address space from allocation. Sometimes that can be done by just putting a region in the resource tree, but there are cases where that doesn't work well. For example, x86 BIOS E820 reservations are not related to devices, so they may overlap part of, all of, or more than a device resource, so they may not end up at the correct spot in the resource tree. Acked-by: H. Peter Anvin Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- include/linux/ioport.h | 1 + kernel/resource.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index b22790268b64..e9bb22cba764 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -123,6 +123,7 @@ extern void reserve_region_with_split(struct resource *root, extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); +extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, diff --git a/kernel/resource.c b/kernel/resource.c index 560659f7baef..798e2fae2a06 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -357,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn) return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; } +void __weak arch_remove_reservations(struct resource *avail) +{ +} + static resource_size_t simple_align_resource(void *data, const struct resource *avail, resource_size_t size, @@ -394,6 +398,7 @@ static int find_resource(struct resource *root, struct resource *new, struct resource *this = root->child; struct resource tmp = *new, avail, alloc; + tmp.flags = new->flags; tmp.start = root->start; /* * Skip past an allocated resource that starts at 0, since the assignment @@ -410,6 +415,7 @@ static int find_resource(struct resource *root, struct resource *new, tmp.end = root->end; resource_clip(&tmp, min, max); + arch_remove_reservations(&tmp); /* Check for overflow after ALIGN() */ avail = *new; -- cgit v1.2.3-70-g09d2 From b8da46d3d55807037b58f14621a0949f18053bde Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 20 Dec 2010 00:29:32 -0500 Subject: clarify a usage constraint for cnt32_to_63() The cnt32_to_63 algorithm relies on proper counter data evaluation ordering to work properly. This was missing from the provided documentation. Let's augment the documentation with the missing usage constraint and fix the only instance that got it wrong. Signed-off-by: Nicolas Pitre Acked-by: David Howells Signed-off-by: Linus Torvalds --- arch/mn10300/kernel/time.c | 10 +++------- include/linux/cnt32_to_63.h | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index f860a340acc9..75da468090b9 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -40,21 +40,17 @@ unsigned long long sched_clock(void) unsigned long long ll; unsigned l[2]; } tsc64, result; - unsigned long tsc, tmp; + unsigned long tmp; unsigned product[3]; /* 96-bit intermediate value */ /* cnt32_to_63() is not safe with preemption */ preempt_disable(); - /* read the TSC value - */ - tsc = get_cycles(); - - /* expand to 64-bits. + /* expand the tsc to 64-bits. * - sched_clock() must be called once a minute or better or the * following will go horribly wrong - see cnt32_to_63() */ - tsc64.ll = cnt32_to_63(tsc) & 0x7fffffffffffffffULL; + tsc64.ll = cnt32_to_63(get_cycles()) & 0x7fffffffffffffffULL; preempt_enable(); diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h index 7605fdd1eb65..e3d8bf26e5eb 100644 --- a/include/linux/cnt32_to_63.h +++ b/include/linux/cnt32_to_63.h @@ -61,13 +61,31 @@ union cnt32_to_63 { * * 2) this code must not be preempted for a duration longer than the * 32-bit counter half period minus the longest period between two - * calls to this code. + * calls to this code; * * Those requirements ensure proper update to the state bit in memory. * This is usually not a problem in practice, but if it is then a kernel * timer should be scheduled to manage for this code to be executed often * enough. * + * And finally: + * + * 3) the cnt_lo argument must be seen as a globally incrementing value, + * meaning that it should be a direct reference to the counter data which + * can be evaluated according to a specific ordering within the macro, + * and not the result of a previous evaluation stored in a variable. + * + * For example, this is wrong: + * + * u32 partial = get_hw_count(); + * u64 full = cnt32_to_63(partial); + * return full; + * + * This is fine: + * + * u64 full = cnt32_to_63(get_hw_count()); + * return full; + * * Note that the top bit (bit 63) in the returned value should be considered * as garbage. It is not cleared here because callers are likely to use a * multiplier on the returned value which can get rid of the top bit -- cgit v1.2.3-70-g09d2