diff options
Diffstat (limited to 'net')
62 files changed, 717 insertions, 352 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 0a6110e15d0f..aaa37b07e30a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -255,24 +255,42 @@ static struct kmem_cache *p9_req_cache; * p9_tag_alloc - Allocate a new request. * @c: Client session. * @type: Transaction type. - * @max_size: Maximum packet size for this request. + * @t_size: Buffer size for holding this request + * (automatic calculation by format template if 0). + * @r_size: Buffer size for holding server's reply on this request + * (automatic calculation by format template if 0). + * @fmt: Format template for assembling 9p request message + * (see p9pdu_vwritef). + * @ap: Variable arguments to be fed to passed format template + * (see p9pdu_vwritef). * * Context: Process context. * Return: Pointer to new request. */ static struct p9_req_t * -p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) +p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size, + const char *fmt, va_list ap) { struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS); - int alloc_msize = min(c->msize, max_size); + int alloc_tsize; + int alloc_rsize; int tag; + va_list apc; + + va_copy(apc, ap); + alloc_tsize = min_t(size_t, c->msize, + t_size ?: p9_msg_buf_size(c, type, fmt, apc)); + va_end(apc); + + alloc_rsize = min_t(size_t, c->msize, + r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap)); if (!req) return ERR_PTR(-ENOMEM); - if (p9_fcall_init(c, &req->tc, alloc_msize)) + if (p9_fcall_init(c, &req->tc, alloc_tsize)) goto free_req; - if (p9_fcall_init(c, &req->rc, alloc_msize)) + if (p9_fcall_init(c, &req->rc, alloc_rsize)) goto free; p9pdu_reset(&req->tc); @@ -592,11 +610,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) } static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, - int8_t type, int req_size, + int8_t type, uint t_size, uint r_size, const char *fmt, va_list ap) { int err; struct p9_req_t *req; + va_list apc; p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -608,7 +627,9 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, if (c->status == BeginDisconnect && type != P9_TCLUNK) return ERR_PTR(-EIO); - req = p9_tag_alloc(c, type, req_size); + va_copy(apc, ap); + req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc); + va_end(apc); if (IS_ERR(req)) return req; @@ -643,9 +664,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) int sigpending, err; unsigned long flags; struct p9_req_t *req; + /* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to + * auto determine an appropriate (small) request/response size + * according to actual message data being sent. Currently RDMA + * transport is excluded from this response message size optimization, + * as it would not cope with it, due to its pooled response buffers + * (using an optimized request size for RDMA as well though). + */ + const uint tsize = 0; + const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0; va_start(ap, fmt); - req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap); va_end(ap); if (IS_ERR(req)) return req; @@ -743,7 +773,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, /* We allocate a inline protocol data of only 4k bytes. * The actual content is passed in zero-copy fashion. */ - req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap); va_end(ap); if (IS_ERR(req)) return req; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 83694c631989..4e3a2a1ffcb3 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -23,6 +23,173 @@ #include <trace/events/9p.h> +/* len[2] text[len] */ +#define P9_STRLEN(s) \ + (2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX)) + +/** + * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the + * intended 9p message. + * @c: client + * @type: message type + * @fmt: format template for assembling request message + * (see p9pdu_vwritef) + * @ap: variable arguments to be fed to passed format template + * (see p9pdu_vwritef) + * + * Note: Even for response types (P9_R*) the format template and variable + * arguments must always be for the originating request type (P9_T*). + */ +size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type, + const char *fmt, va_list ap) +{ + /* size[4] type[1] tag[2] */ + const int hdr = 4 + 1 + 2; + /* ename[s] errno[4] */ + const int rerror_size = hdr + P9_ERRMAX + 4; + /* ecode[4] */ + const int rlerror_size = hdr + 4; + const int err_size = + c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size; + + static_assert(NAME_MAX <= 4*1024, "p9_msg_buf_size() currently assumes " + "a max. allowed directory entry name length of 4k"); + + switch (type) { + + /* message types not used at all */ + case P9_TERROR: + case P9_TLERROR: + case P9_TAUTH: + case P9_RAUTH: + BUG(); + + /* variable length & potentially large message types */ + case P9_TATTACH: + BUG_ON(strcmp("ddss?u", fmt)); + va_arg(ap, int32_t); + va_arg(ap, int32_t); + { + const char *uname = va_arg(ap, const char *); + const char *aname = va_arg(ap, const char *); + /* fid[4] afid[4] uname[s] aname[s] n_uname[4] */ + return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4; + } + case P9_TWALK: + BUG_ON(strcmp("ddT", fmt)); + va_arg(ap, int32_t); + va_arg(ap, int32_t); + { + uint i, nwname = va_arg(ap, int); + size_t wname_all; + const char **wnames = va_arg(ap, const char **); + for (i = 0, wname_all = 0; i < nwname; ++i) { + wname_all += P9_STRLEN(wnames[i]); + } + /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */ + return hdr + 4 + 4 + 2 + wname_all; + } + case P9_RWALK: + BUG_ON(strcmp("ddT", fmt)); + va_arg(ap, int32_t); + va_arg(ap, int32_t); + { + uint nwname = va_arg(ap, int); + /* nwqid[2] nwqid*(wqid[13]) */ + return max_t(size_t, hdr + 2 + nwname * 13, err_size); + } + case P9_TCREATE: + BUG_ON(strcmp("dsdb?s", fmt)); + va_arg(ap, int32_t); + { + const char *name = va_arg(ap, const char *); + if (c->proto_version == p9_proto_legacy) { + /* fid[4] name[s] perm[4] mode[1] */ + return hdr + 4 + P9_STRLEN(name) + 4 + 1; + } else { + va_arg(ap, int32_t); + va_arg(ap, int); + { + const char *ext = va_arg(ap, const char *); + /* fid[4] name[s] perm[4] mode[1] extension[s] */ + return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext); + } + } + } + case P9_TLCREATE: + BUG_ON(strcmp("dsddg", fmt)); + va_arg(ap, int32_t); + { + const char *name = va_arg(ap, const char *); + /* fid[4] name[s] flags[4] mode[4] gid[4] */ + return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4; + } + case P9_RREAD: + case P9_RREADDIR: + BUG_ON(strcmp("dqd", fmt)); + va_arg(ap, int32_t); + va_arg(ap, int64_t); + { + const int32_t count = va_arg(ap, int32_t); + /* count[4] data[count] */ + return max_t(size_t, hdr + 4 + count, err_size); + } + case P9_TWRITE: + BUG_ON(strcmp("dqV", fmt)); + va_arg(ap, int32_t); + va_arg(ap, int64_t); + { + const int32_t count = va_arg(ap, int32_t); + /* fid[4] offset[8] count[4] data[count] */ + return hdr + 4 + 8 + 4 + count; + } + case P9_TRENAMEAT: + BUG_ON(strcmp("dsds", fmt)); + va_arg(ap, int32_t); + { + const char *oldname, *newname; + oldname = va_arg(ap, const char *); + va_arg(ap, int32_t); + newname = va_arg(ap, const char *); + /* olddirfid[4] oldname[s] newdirfid[4] newname[s] */ + return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname); + } + case P9_TSYMLINK: + BUG_ON(strcmp("dssg", fmt)); + va_arg(ap, int32_t); + { + const char *name = va_arg(ap, const char *); + const char *symtgt = va_arg(ap, const char *); + /* fid[4] name[s] symtgt[s] gid[4] */ + return hdr + 4 + P9_STRLEN(name) + P9_STRLEN(symtgt) + 4; + } + + case P9_RERROR: + return rerror_size; + case P9_RLERROR: + return rlerror_size; + + /* small message types */ + case P9_TWSTAT: + case P9_RSTAT: + case P9_RREADLINK: + case P9_TXATTRWALK: + case P9_TXATTRCREATE: + case P9_TLINK: + case P9_TMKDIR: + case P9_TMKNOD: + case P9_TRENAME: + case P9_TUNLINKAT: + case P9_TLOCK: + return 8 * 1024; + + /* tiny message types */ + default: + return 4 * 1024; + + } +} + static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 6d719c30331a..ad2283d1f96b 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -8,6 +8,8 @@ * Copyright (C) 2008 by IBM, Corp. */ +size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type, + const char *fmt, va_list ap); int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index e758978b44be..56a186768750 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -91,6 +91,7 @@ struct p9_poll_wait { * @mux_list: list link for mux to manage multiple connections (?) * @client: reference to client instance for this connection * @err: error state + * @req_lock: lock protecting req_list and requests statuses * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request @@ -114,6 +115,7 @@ struct p9_conn { struct list_head mux_list; struct p9_client *client; int err; + spinlock_t req_lock; struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *rreq; @@ -189,10 +191,10 @@ static void p9_conn_cancel(struct p9_conn *m, int err) p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - spin_lock(&m->client->lock); + spin_lock(&m->req_lock); if (m->err) { - spin_unlock(&m->client->lock); + spin_unlock(&m->req_lock); return; } @@ -205,6 +207,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_move(&req->req_list, &cancel_list); } + spin_unlock(&m->req_lock); + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); @@ -212,7 +216,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } - spin_unlock(&m->client->lock); } static __poll_t @@ -359,7 +362,7 @@ static void p9_read_work(struct work_struct *work) if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; - spin_lock(&m->client->lock); + spin_lock(&m->req_lock); if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); @@ -368,14 +371,14 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "Ignore replies associated with a cancelled request\n"); } else { - spin_unlock(&m->client->lock); + spin_unlock(&m->req_lock); p9_debug(P9_DEBUG_ERROR, "Request tag %d errored out while we were reading the reply\n", m->rc.tag); err = -EIO; goto error; } - spin_unlock(&m->client->lock); + spin_unlock(&m->req_lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; @@ -453,10 +456,10 @@ static void p9_write_work(struct work_struct *work) } if (!m->wsize) { - spin_lock(&m->client->lock); + spin_lock(&m->req_lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); - spin_unlock(&m->client->lock); + spin_unlock(&m->req_lock); return; } @@ -471,7 +474,7 @@ static void p9_write_work(struct work_struct *work) m->wpos = 0; p9_req_get(req); m->wreq = req; - spin_unlock(&m->client->lock); + spin_unlock(&m->req_lock); } p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", @@ -588,6 +591,7 @@ static void p9_conn_create(struct p9_client *client) INIT_LIST_HEAD(&m->mux_list); m->client = client; + spin_lock_init(&m->req_lock); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); INIT_WORK(&m->rq, p9_read_work); @@ -669,10 +673,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) if (m->err < 0) return m->err; - spin_lock(&client->lock); + spin_lock(&m->req_lock); req->status = REQ_STATUS_UNSENT; list_add_tail(&req->req_list, &m->unsent_req_list); - spin_unlock(&client->lock); + spin_unlock(&m->req_lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; @@ -687,11 +691,13 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; int ret = 1; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); - spin_lock(&client->lock); + spin_lock(&m->req_lock); if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); @@ -699,21 +705,24 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) p9_req_put(client, req); ret = 0; } - spin_unlock(&client->lock); + spin_unlock(&m->req_lock); return ret; } static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); - spin_lock(&client->lock); + spin_lock(&m->req_lock); /* Ignore cancelled request if message has been received * before lock. */ if (req->status == REQ_STATUS_RCVD) { - spin_unlock(&client->lock); + spin_unlock(&m->req_lock); return 0; } @@ -722,7 +731,8 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) */ list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; - spin_unlock(&client->lock); + spin_unlock(&m->req_lock); + p9_req_put(client, req); return 0; @@ -821,11 +831,14 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; + /* prevent workers from hanging on IO when fd is a pipe */ + ts->rd->f_flags |= O_NONBLOCK; ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; + ts->wr->f_flags |= O_NONBLOCK; client->trans = ts; client->status = Connected; @@ -1061,7 +1074,9 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) int err; struct p9_fd_opts opts; - parse_opts(args, &opts); + err = parse_opts(args, &opts); + if (err < 0) + return err; client->trans_opts.fd.rfd = opts.rfd; client->trans_opts.fd.wfd = opts.wfd; @@ -1082,6 +1097,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, + .pooled_rbuffers = false, .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index d817d3745238..6ff706760676 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -739,6 +739,7 @@ error: static struct p9_trans_module p9_rdma_trans = { .name = "rdma", .maxsize = P9_RDMA_MAXSIZE, + .pooled_rbuffers = true, .def = 0, .owner = THIS_MODULE, .create = rdma_create_trans, diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index b84d35cf6899..e757f0601304 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -802,6 +802,7 @@ static struct p9_trans_module p9_virtio_trans = { * page in zero copy. */ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), + .pooled_rbuffers = false, .def = 1, .owner = THIS_MODULE, }; diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 227f89cc7237..b15c64128c3e 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -246,6 +246,7 @@ static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r) static struct p9_trans_module p9_xen_trans = { .name = "xen", .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT - 2), + .pooled_rbuffers = false, .def = 1, .create = p9_xen_create, .close = p9_xen_close, @@ -510,7 +511,7 @@ static struct xenbus_driver xen_9pfs_front_driver = { .otherend_changed = xen_9pfs_front_changed, }; -static int p9_trans_xen_init(void) +static int __init p9_trans_xen_init(void) { int rc; @@ -529,7 +530,7 @@ static int p9_trans_xen_init(void) module_init(p9_trans_xen_init); MODULE_ALIAS_9P("xen"); -static void p9_trans_xen_exit(void) +static void __exit p9_trans_xen_exit(void) { v9fs_unregister_trans(&p9_xen_trans); return xenbus_unregister_driver(&xen_9pfs_front_driver); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index ebd78fdbd6e8..8009e0e93216 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -35,7 +35,6 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/rfcomm.h> -#define RFCOMM_TTY_MAGIC 0x6d02 /* magic number for rfcomm struct */ #define RFCOMM_TTY_PORTS RFCOMM_MAX_DEV /* whole lotta rfcomm devices */ #define RFCOMM_TTY_MAJOR 216 /* device node major id of the usb/bluetooth.c driver */ #define RFCOMM_TTY_MINOR 0 @@ -855,7 +854,8 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned l return -ENOIOCTLCMD; } -static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) +static void rfcomm_tty_set_termios(struct tty_struct *tty, + const struct ktermios *old) { struct ktermios *new = &tty->termios; int old_baud_rate = tty_termios_baud_rate(old); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d3bb656308b4..dfa237fbd5a3 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -728,7 +728,6 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, it->iter.bi_size = cursor->resid; BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); - cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); } static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, @@ -754,10 +753,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, cursor->resid -= bytes; bio_advance_iter(it->bio, &it->iter, bytes); - if (!cursor->resid) { - BUG_ON(!cursor->last_piece); + if (!cursor->resid) return false; /* no more data */ - } if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && page == bio_iter_page(it->bio, it->iter))) @@ -770,9 +767,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, it->iter.bi_size = cursor->resid; } - BUG_ON(cursor->last_piece); BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); - cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); return true; } #endif /* CONFIG_BLOCK */ @@ -788,8 +783,6 @@ static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor, cursor->bvec_iter.bi_size = cursor->resid; BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); - cursor->last_piece = - cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); } static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor, @@ -815,19 +808,14 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, cursor->resid -= bytes; bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes); - if (!cursor->resid) { - BUG_ON(!cursor->last_piece); + if (!cursor->resid) return false; /* no more data */ - } if (!bytes || (cursor->bvec_iter.bi_bvec_done && page == bvec_iter_page(bvecs, cursor->bvec_iter))) return false; /* more bytes to process in this segment */ - BUG_ON(cursor->last_piece); BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); - cursor->last_piece = - cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); return true; } @@ -853,7 +841,6 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, BUG_ON(page_count > (int)USHRT_MAX); cursor->page_count = (unsigned short)page_count; BUG_ON(length > SIZE_MAX - cursor->page_offset); - cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE; } static struct page * @@ -868,11 +855,7 @@ ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor, BUG_ON(cursor->page_offset >= PAGE_SIZE); *page_offset = cursor->page_offset; - if (cursor->last_piece) - *length = cursor->resid; - else - *length = PAGE_SIZE - *page_offset; - + *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset); return data->pages[cursor->page_index]; } @@ -897,8 +880,6 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor, BUG_ON(cursor->page_index >= cursor->page_count); cursor->page_index++; - cursor->last_piece = cursor->resid <= PAGE_SIZE; - return true; } @@ -928,7 +909,6 @@ ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor, cursor->resid = min(length, pagelist->length); cursor->page = page; cursor->offset = 0; - cursor->last_piece = cursor->resid <= PAGE_SIZE; } static struct page * @@ -948,11 +928,7 @@ ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor, /* offset of first page in pagelist is always 0 */ *page_offset = cursor->offset & ~PAGE_MASK; - if (cursor->last_piece) - *length = cursor->resid; - else - *length = PAGE_SIZE - *page_offset; - + *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset); return cursor->page; } @@ -985,8 +961,6 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); cursor->page = list_next_entry(cursor->page, lru); - cursor->last_piece = cursor->resid <= PAGE_SIZE; - return true; } @@ -1044,8 +1018,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, * Indicate whether this is the last piece in this data item. */ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, - size_t *page_offset, size_t *length, - bool *last_piece) + size_t *page_offset, size_t *length) { struct page *page; @@ -1074,8 +1047,6 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, BUG_ON(*page_offset + *length > PAGE_SIZE); BUG_ON(!*length); BUG_ON(*length > cursor->resid); - if (last_piece) - *last_piece = cursor->last_piece; return page; } @@ -1112,7 +1083,6 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) cursor->total_resid -= bytes; if (!cursor->resid && cursor->total_resid) { - WARN_ON(!cursor->last_piece); cursor->data++; __ceph_msg_data_cursor_init(cursor); new_piece = true; diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 6b014eca3a13..3ddbde87e4d6 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -495,7 +495,7 @@ static int write_partial_message_data(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); + page = ceph_msg_data_next(cursor, &page_offset, &length); if (length == cursor->total_resid) more = MSG_MORE; ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, @@ -1008,7 +1008,7 @@ static int read_partial_msg_data(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); + page = ceph_msg_data_next(cursor, &page_offset, &length); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { if (do_datacrc) @@ -1050,7 +1050,7 @@ static int read_partial_msg_data_bounce(struct ceph_connection *con) continue; } - page = ceph_msg_data_next(cursor, &off, &len, NULL); + page = ceph_msg_data_next(cursor, &off, &len); ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); if (ret <= 0) { con->in_data_crc = crc; diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c6e5bfc717d5..cc8ff81a50b7 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -862,7 +862,7 @@ static void get_bvec_at(struct ceph_msg_data_cursor *cursor, ceph_msg_data_advance(cursor, 0); /* get a piece of data, cursor isn't advanced */ - page = ceph_msg_data_next(cursor, &off, &len, NULL); + page = ceph_msg_data_next(cursor, &off, &len); bv->bv_page = page; bv->bv_offset = off; diff --git a/net/core/sock.c b/net/core/sock.c index eeb6cbac6f49..a3ba0358c77c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3610,7 +3610,8 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_getsockopt); @@ -3636,7 +3637,8 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_setsockopt); diff --git a/net/dsa/port.c b/net/dsa/port.c index e4a0513816bb..208168276995 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1681,7 +1681,7 @@ int dsa_port_phylink_create(struct dsa_port *dp) pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), mode, &dsa_port_phylink_mac_ops); if (IS_ERR(pl)) { - pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); + pr_err("error creating PHYLINK: %ld\n", PTR_ERR(pl)); return PTR_ERR(pl); } diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index cbd0e2ac4ffe..6e55fae4c686 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -251,9 +251,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) return -EOPNOTSUPP; } - if (!size) - return -EINVAL; - lock_sock(sk); if (!sk->sk_bound_dev_if) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); @@ -275,6 +272,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = -EMSGSIZE; goto out_dev; } + if (!size) { + err = 0; + goto out_dev; + } hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e2c219382345..3dd02396517d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -558,22 +558,27 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; + + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); + return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->connect(sk, uaddr, addr_len); + return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); @@ -734,10 +739,11 @@ EXPORT_SYMBOL(inet_stream_connect); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { - struct sock *sk1 = sock->sk; + struct sock *sk1 = sock->sk, *sk2; int err = -EINVAL; - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); if (!sk2) goto do_err; @@ -825,12 +831,15 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - if (sk->sk_prot->sendpage) - return sk->sk_prot->sendpage(sk, page, offset, size, flags); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (prot->sendpage) + return prot->sendpage(sk, page, offset, size, flags); return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(inet_sendpage); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2dc97583d279..e9a7f70a54df 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,13 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; - nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 71d3bb0abf6c..66fc940f9521 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -268,8 +268,21 @@ restart_rcu: rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) + if (sk->sk_state != TCP_TIME_WAIT) { + /* A kernel listener socket might not hold refcnt for net, + * so reqsk_timer_handler() could be fired after net is + * freed. Userspace listener and reqsk never exist here. + */ + if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV && + hashinfo->pernet)) { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); + } + continue; + } + tw = inet_twsk(sk); if ((tw->tw_family != family) || refcount_read(&twsk_net(tw)->ns.count)) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 8183bbcabb4a..ff85db52b2e5 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -77,7 +77,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; - flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); + flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 7ade04ff972d..e886147eed11 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -84,7 +84,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, oif = NULL; if (priv->flags & NFTA_FIB_F_IIF) - fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif); + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif); if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 517042caf6dc..bde333b24837 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -617,21 +617,9 @@ int ping_getfrag(void *from, char *to, { struct pingfakehdr *pfh = from; - if (offset == 0) { - fraglen -= sizeof(struct icmphdr); - if (fraglen < 0) - BUG(); - if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr), - fraglen, &pfh->wcheck, - &pfh->msg->msg_iter)) - return -EFAULT; - } else if (offset < sizeof(struct icmphdr)) { - BUG(); - } else { - if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, - &pfh->msg->msg_iter)) - return -EFAULT; - } + if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter)) + return -EFAULT; #if IS_ENABLED(CONFIG_IPV6) /* For IPv6, checksum each skb as we go along, as expected by @@ -639,7 +627,7 @@ int ping_getfrag(void *from, char *to, * wcheck, it will be finalized in ping_v4_push_pending_frames. */ if (pfh->family == AF_INET6) { - skb->csum = pfh->wcheck; + skb->csum = csum_block_add(skb->csum, pfh->wcheck, odd); skb->ip_summed = CHECKSUM_NONE; pfh->wcheck = 0; } @@ -842,7 +830,8 @@ back_from_confirm: pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, - 0, &ipc, &rt, msg->msg_flags); + sizeof(struct icmphdr), &ipc, &rt, + msg->msg_flags); if (err) ip_flush_pending_frames(sk); else diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0c51abeee172..f8232811a5be 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3796,8 +3796,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname, + optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); @@ -4396,8 +4397,9 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname, + optval, optlen); return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index ddc7ba0554bd..112f28f93693 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -375,6 +375,7 @@ static void tcp_cdg_init(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->gradients = NULL; /* We silently fall back to window = 1 if allocation fails. */ if (window > 1) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), @@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); kfree(ca->gradients); + ca->gradients = NULL; } static struct tcp_congestion_ops tcp_cdg __read_mostly = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 79f30f026d89..c375f603a16c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -353,13 +353,14 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family) struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { - /* The last refcount is decremented in tcp_sk_exit_batch() */ - if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) - continue; - if (net->ipv4.tcp_death_row.hashinfo->pernet) { + /* Even if tw_refcount == 1, we must clean up kernel reqsk */ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); } else if (!purged_once) { + /* The last refcount is decremented in tcp_sk_exit_batch() */ + if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) + continue; + inet_twsk_purge(&tcp_hashinfo, family); purged_once = true; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d63118ce5900..8126f67d18b3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1598,7 +1598,7 @@ drop: } EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); -void udp_destruct_sock(struct sock *sk) +void udp_destruct_common(struct sock *sk) { /* reclaim completely the forward allocated memory */ struct udp_sock *up = udp_sk(sk); @@ -1611,10 +1611,14 @@ void udp_destruct_sock(struct sock *sk) kfree_skb(skb); } udp_rmem_release(sk, total, 0, true); +} +EXPORT_SYMBOL_GPL(udp_destruct_common); +static void udp_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); inet_sock_destruct(sk); } -EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { @@ -1622,7 +1626,6 @@ int udp_init_sock(struct sock *sk) sk->sk_destruct = udp_destruct_sock; return 0; } -EXPORT_SYMBOL_GPL(udp_init_sock); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 6e08a76ae1e7..e0c9cc39b81e 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -17,6 +17,14 @@ struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); +/* Designate sk as UDP-Lite socket */ +static int udplite_sk_init(struct sock *sk) +{ + udp_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplite_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d40b7d60e00e..024191004982 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -109,6 +109,12 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } +void inet6_sock_destruct(struct sock *sk) +{ + inet6_cleanup_sock(sk); + inet_sock_destruct(sk); +} + static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -201,7 +207,7 @@ lookup_protocol: inet->hdrincl = 1; } - sk->sk_destruct = inet_sock_destruct; + sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; @@ -510,6 +516,12 @@ void inet6_destroy_sock(struct sock *sk) } EXPORT_SYMBOL_GPL(inet6_destroy_sock); +void inet6_cleanup_sock(struct sock *sk) +{ + inet6_destroy_sock(sk); +} +EXPORT_SYMBOL_GPL(inet6_cleanup_sock); + /* * This does both peername and sockname. */ diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2d2f4dd9e5df..532f4478c884 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -419,15 +419,18 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, rtnl_lock(); sockopt_lock_sock(sk); + /* Another thread has converted the socket into IPv4 with + * IPV6_ADDRFORM concurrently. + */ + if (unlikely(sk->sk_family != AF_INET6)) + goto unlock; + switch (optname) { case IPV6_ADDRFORM: if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { - struct ipv6_txoptions *opt; - struct sk_buff *pktopt; - if (sk->sk_type == SOCK_RAW) break; @@ -458,7 +461,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } - fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); @@ -475,9 +477,10 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); - /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */ WRITE_ONCE(sk->sk_prot, &tcp_prot); - icsk->icsk_af_ops = &ipv4_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -490,19 +493,19 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); - /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg((__force struct ipv6_txoptions **)&np->opt, - NULL); - if (opt) { - atomic_sub(opt->tot_len, &sk->sk_omem_alloc); - txopt_put(opt); - } - pktopt = xchg(&np->pktoptions, NULL); - kfree_skb(pktopt); + + /* Disable all options not to allocate memory anymore, + * but there is still a race. See the lockless path + * in udpv6_sendmsg() and ipv6_local_rxpmtu(). + */ + np->rxopt.all = 0; + + inet6_cleanup_sock(sk); /* * ... and add it to the refcnt debug socks count @@ -994,6 +997,7 @@ done: break; } +unlock: sockopt_release_sock(sk); if (needs_rtnl) rtnl_unlock(); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index d800801a5dd2..69d86b040a6a 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -37,6 +37,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, bool ret = false; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev), .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .daddr = iph->saddr, @@ -55,9 +56,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, if (rpfilter_addr_linklocal(&iph->saddr)) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6.flowi6_oif = dev->ifindex; - /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */ - } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) || - (flags & XT_RPFILTER_LOOSE) == 0) + } else if ((flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); @@ -72,9 +71,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, goto out; } - if (rt->rt6i_idev->dev == dev || - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || - (flags & XT_RPFILTER_LOOSE)) + if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) ret = true; out: ip6_rt_put(rt); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 1d7e520d9966..91faac610e03 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -41,9 +41,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); - } else if ((priv->flags & NFTA_FIB_F_IIF) && - (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) { - fl6->flowi6_oif = dev->ifindex; + } else if (priv->flags & NFTA_FIB_F_IIF) { + fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5f2ef8493714..86c26e48d065 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -179,7 +179,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, - 0, &ipc6, &fl6, rt, + sizeof(struct icmp6hdr), &ipc6, &fl6, rt, MSG_DONTWAIT); if (err) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a8adda623da1..2a3f9296df1e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -238,7 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - icsk->icsk_af_ops = &ipv6_mapped; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -250,7 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) { icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &ipv6_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91e795bb9ade..8d09f0ea5b8c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -56,6 +56,19 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static void udpv6_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +int udpv6_init_sock(struct sock *sk) +{ + skb_queue_head_init(&udp_sk(sk)->reader_queue); + sk->sk_destruct = udpv6_destruct_sock; + return 0; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -1733,7 +1746,7 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udp_init_sock, + .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 4251e49d32a0..0590f566379d 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); +int udpv6_init_sock(struct sock *sk); int udp_v6_get_port(struct sock *sk, unsigned short snum); void udp_v6_rehash(struct sock *sk); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index b70725856259..67eaf3ca14ce 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -12,6 +12,13 @@ #include <linux/proc_fs.h> #include "udp_impl.h" +static int udplitev6_sk_init(struct sock *sk) +{ + udpv6_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); @@ -38,7 +45,7 @@ struct proto udplitev6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udplite_sk_init, + .init = udplitev6_sk_init, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 1215c863e1c4..27725464ec08 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1838,10 +1838,10 @@ static int kcm_release(struct socket *sock) kcm = kcm_sk(sk); mux = kcm->mux; + lock_sock(sk); sock_orphan(sk); kfree_skb(kcm->seq_skb); - lock_sock(sk); /* Purge queue under lock to avoid race condition with tx_work trying * to act when queue is nonempty. If tx_work runs after this point * it will just return. diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4e1d4c339f2d..a842f2e1c230 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1709,6 +1709,14 @@ struct ieee802_11_elems { /* whether a parse error occurred while retrieving these elements */ bool parse_error; + + /* + * scratch buffer that can be used for various element parsing related + * tasks, e.g., element de-fragmentation etc. + */ + size_t scratch_len; + u8 *scratch_pos; + u8 scratch[]; }; static inline struct ieee80211_local *hw_to_local( diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 572254366a0f..dd9ac1f7d2ea 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -243,7 +243,7 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata */ break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; } unlock: @@ -461,7 +461,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do /* * Stop TX on this interface first. */ - if (sdata->dev) + if (!local->ops->wake_tx_queue && sdata->dev) netif_tx_stop_all_queues(sdata->dev); ieee80211_roc_purge(local, sdata); @@ -1412,8 +1412,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->vif.type != NL80211_IFTYPE_STATION); } - set_bit(SDATA_STATE_RUNNING, &sdata->state); - switch (sdata->vif.type) { case NL80211_IFTYPE_P2P_DEVICE: rcu_assign_pointer(local->p2p_sdata, sdata); @@ -1472,6 +1470,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + return 0; err_del_interface: drv_remove_interface(local, sdata); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 54b8d5065bbd..d8484cd870de 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4409,8 +4409,11 @@ ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata, he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ies->data, ies->len); + if (!he_cap_elem) + return false; + /* invalid HE IE */ - if (!he_cap_elem || he_cap_elem->datalen < 1 + sizeof(*he_cap)) { + if (he_cap_elem->datalen < 1 + sizeof(*he_cap)) { sdata_info(sdata, "Invalid HE elem, Disable HE\n"); return false; @@ -4676,8 +4679,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } if (!elems->vht_cap_elem) { - sdata_info(sdata, - "bad VHT capabilities, disabling VHT\n"); *conn_flags |= IEEE80211_CONN_DISABLE_VHT; vht_oper = NULL; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bd215fe3c796..f99416d2e144 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1978,10 +1978,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS) { - cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, - skb->data, - skb->len); + NUM_DEFAULT_BEACON_KEYS) { + if (rx->sdata->dev) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + skb->data, + skb->len); return RX_DROP_MONITOR; /* unexpected BIP keyidx */ } @@ -2131,7 +2132,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE)) + if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE && + rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); @@ -4352,6 +4354,7 @@ void ieee80211_check_fast_rx(struct sta_info *sta) .vif_type = sdata->vif.type, .control_port_protocol = sdata->control_port_protocol, }, *old, *new = NULL; + u32 offload_flags; bool set_offload = false; bool assign = false; bool offload; @@ -4467,10 +4470,10 @@ void ieee80211_check_fast_rx(struct sta_info *sta) if (assign) new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL); - offload = assign && - (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED); + offload_flags = get_bss_sdata(sdata)->vif.offload_flags; + offload = offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED; - if (offload) + if (assign && offload) set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD); else set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD); @@ -4708,7 +4711,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, if (!(status->rx_flags & IEEE80211_RX_AMSDU)) { if (!pskb_may_pull(skb, snap_offs + sizeof(*payload))) - goto drop; + return false; payload = (void *)(skb->data + snap_offs); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 27c964be102e..a364148149f9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2319,6 +2319,10 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, u16 len_rthdr; int hdrlen; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(!ieee80211_sdata_running(sdata))) + goto fail; + memset(info, 0, sizeof(*info)); info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_CTL_INJECTED; @@ -2378,8 +2382,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, * This is necessary, for example, for old hostapd versions that * don't use nl80211-based management TX/RX. */ - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - list_for_each_entry_rcu(tmp_sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(tmp_sdata)) continue; @@ -4169,7 +4171,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct sk_buff *next; int len = skb->len; - if (unlikely(skb->len < ETH_HLEN)) { + if (unlikely(!ieee80211_sdata_running(sdata) || skb->len < ETH_HLEN)) { kfree_skb(skb); return; } @@ -4566,7 +4568,7 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, struct ieee80211_key *key; struct sta_info *sta; - if (unlikely(skb->len < ETH_HLEN)) { + if (unlikely(!ieee80211_sdata_running(sdata) || skb->len < ETH_HLEN)) { kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index bf7461c41bef..b512cb37aafb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1445,6 +1445,8 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) { if (elem->datalen < 2) continue; + if (elem->data[0] < 1 || elem->data[0] > 8) + continue; for_each_element(sub, elem->data + 1, elem->datalen - 1) { u8 new_bssid[ETH_ALEN]; @@ -1504,24 +1506,26 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; + size_t scratch_len = params->len; - elems = kzalloc(sizeof(*elems), GFP_ATOMIC); + elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC); if (!elems) return NULL; elems->ie_start = params->start; elems->total_len = params->len; - - nontransmitted_profile = kmalloc(params->len, GFP_ATOMIC); - if (nontransmitted_profile) { - nontransmitted_profile_len = - ieee802_11_find_bssid_profile(params->start, params->len, - elems, params->bss, - nontransmitted_profile); - non_inherit = - cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - nontransmitted_profile, - nontransmitted_profile_len); - } + elems->scratch_len = scratch_len; + elems->scratch_pos = elems->scratch; + + nontransmitted_profile = elems->scratch_pos; + nontransmitted_profile_len = + ieee802_11_find_bssid_profile(params->start, params->len, + elems, params->bss, + nontransmitted_profile); + elems->scratch_pos += nontransmitted_profile_len; + elems->scratch_len -= nontransmitted_profile_len; + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + nontransmitted_profile, + nontransmitted_profile_len); elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit); @@ -1555,8 +1559,6 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) offsetofend(struct ieee80211_bssid_index, dtim_count)) elems->dtim_count = elems->bssid_index->dtim_count; - kfree(nontransmitted_profile); - return elems; } @@ -2046,7 +2048,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, if (he_cap) { enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); - __le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype); + __le16 cap = ieee80211_get_he_6ghz_capa(sband6, iftype); pos = ieee80211_write_he_6ghz_cap(pos, cap, end); } diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index c2fc2a7b2528..b6b5e496fa40 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -295,11 +295,12 @@ __must_hold(&net->mctp.keys_lock) mctp_dev_release_key(key->dev, key); spin_unlock_irqrestore(&key->lock, flags); - hlist_del(&key->hlist); - hlist_del(&key->sklist); - - /* unref for the lists */ - mctp_key_unref(key); + if (!hlist_unhashed(&key->hlist)) { + hlist_del_init(&key->hlist); + hlist_del_init(&key->sklist); + /* unref for the lists */ + mctp_key_unref(key); + } kfree_skb(skb); } @@ -373,9 +374,17 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC; if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) { - spin_lock_irqsave(&key->lock, flags); - __mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED); + unsigned long fl2; + /* Unwind our key allocation: the keys list lock needs to be + * taken before the individual key locks, and we need a valid + * flags value (fl2) to pass to __mctp_key_remove, hence the + * second spin_lock_irqsave() rather than a plain spin_lock(). + */ + spin_lock_irqsave(&net->mctp.keys_lock, flags); + spin_lock_irqsave(&key->lock, fl2); + __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_DROPPED); mctp_key_unref(key); + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); return -EFAULT; } diff --git a/net/mctp/route.c b/net/mctp/route.c index 3b24b8d18b5b..2155f15a074c 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -228,12 +228,12 @@ __releases(&key->lock) if (!key->manual_alloc) { spin_lock_irqsave(&net->mctp.keys_lock, flags); - hlist_del(&key->hlist); - hlist_del(&key->sklist); + if (!hlist_unhashed(&key->hlist)) { + hlist_del_init(&key->hlist); + hlist_del_init(&key->sklist); + mctp_key_unref(key); + } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - - /* unref for the lists */ - mctp_key_unref(key); } /* and one for the local reference */ diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index a7de29137618..49a5348a6a14 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -40,16 +40,17 @@ static noinline bool nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo *pkt, u32 level) { struct cgroup *cgrp; + u64 cgid; if (!sk_fullsock(sk)) return false; - cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - if (level > cgrp->level) + cgrp = cgroup_ancestor(sock_cgroup_ptr(&sk->sk_cgrp_data), level); + if (!cgrp) return false; - memcpy(dest, &cgrp->ancestor_ids[level], sizeof(u64)); - + cgid = cgroup_id(cgrp); + memcpy(dest, &cgid, sizeof(u64)); return true; } #endif diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cb255d8ed99a..c7b10234cf7c 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1015,7 +1015,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * connections which we will commit, we may need to attach * the helper here. */ - if (info->commit && info->helper && !nfct_help(ct)) { + if (!nf_ct_is_confirmed(ct) && info->commit && + info->helper && !nfct_help(ct)) { int err = __nf_ct_try_assign_helper(ct, info->ct, GFP_ATOMIC); if (err) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 435d866fcfa0..570389f6cdd7 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2043,14 +2043,12 @@ start_error: static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) { - struct taprio_sched *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); - unsigned int ntx = cl - 1; + struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); - if (ntx >= dev->num_tx_queues) + if (!dev_queue) return NULL; - return q->qdiscs[ntx]; + return dev_queue->qdisc_sleeping; } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c284efa3d1ef..993acf38af87 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -345,7 +345,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt) { int clid; - clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL); + clid = ida_alloc(&rpc_clids, GFP_KERNEL); if (clid < 0) return clid; clnt->cl_clid = clid; @@ -354,7 +354,7 @@ static int rpc_alloc_clid(struct rpc_clnt *clnt) static void rpc_free_clid(struct rpc_clnt *clnt) { - ida_simple_remove(&rpc_clids, clnt->cl_clid); + ida_free(&rpc_clids, clnt->cl_clid); } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, @@ -873,6 +873,57 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_killall_tasks); +/** + * rpc_cancel_tasks - try to cancel a set of RPC tasks + * @clnt: Pointer to RPC client + * @error: RPC task error value to set + * @fnmatch: Pointer to selector function + * @data: User data + * + * Uses @fnmatch to define a set of RPC tasks that are to be cancelled. + * The argument @error must be a negative error value. + */ +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data) +{ + struct rpc_task *task; + unsigned long count = 0; + + if (list_empty(&clnt->cl_tasks)) + return 0; + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) { + if (!RPC_IS_ACTIVATED(task)) + continue; + if (!fnmatch(task, data)) + continue; + rpc_task_try_cancel(task, error); + count++; + } + spin_unlock(&clnt->cl_lock); + return count; +} +EXPORT_SYMBOL_GPL(rpc_cancel_tasks); + +static int rpc_clnt_disconnect_xprt(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, void *dummy) +{ + if (xprt_connected(xprt)) + xprt_force_disconnect(xprt); + return 0; +} + +void rpc_clnt_disconnect(struct rpc_clnt *clnt) +{ + rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_disconnect_xprt, NULL); +} +EXPORT_SYMBOL_GPL(rpc_clnt_disconnect); + /* * Properly shut down an RPC client, terminating all outstanding * requests. @@ -1642,7 +1693,7 @@ static void __rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) { trace_rpc_call_rpcerror(task, tk_status, rpc_status); - task->tk_rpc_status = rpc_status; + rpc_task_set_rpc_status(task, rpc_status); rpc_exit(task, tk_status); } @@ -2435,10 +2486,8 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - if (RPC_SIGNALLED(task)) { - rpc_call_rpcerror(task, -ERESTARTSYS); + if (RPC_SIGNALLED(task)) return; - } if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 25b9221950ff..be587a308e05 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -65,6 +65,13 @@ gfp_t rpc_task_gfp_mask(void) } EXPORT_SYMBOL_GPL(rpc_task_gfp_mask); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status) +{ + if (cmpxchg(&task->tk_rpc_status, 0, rpc_status) == 0) + return true; + return false; +} + unsigned long rpc_task_timeout(const struct rpc_task *task) { @@ -269,7 +276,7 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode) { - freezable_schedule_unsafe(); + schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; return 0; @@ -333,14 +340,12 @@ static int rpc_complete_task(struct rpc_task *task) * to enforce taking of the wq->lock and hence avoid races with * rpc_complete_task(). */ -int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action) +int rpc_wait_for_completion_task(struct rpc_task *task) { - if (action == NULL) - action = rpc_wait_bit_killable; return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, - action, TASK_KILLABLE); + rpc_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); } -EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); +EXPORT_SYMBOL_GPL(rpc_wait_for_completion_task); /* * Make an RPC task runnable. @@ -855,12 +860,25 @@ void rpc_signal_task(struct rpc_task *task) if (!RPC_IS_ACTIVATED(task)) return; + if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) + return; trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); if (queue) - rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS); + rpc_wake_up_queued_task(queue, task); +} + +void rpc_task_try_cancel(struct rpc_task *task, int error) +{ + struct rpc_wait_queue *queue; + + if (!rpc_task_set_rpc_status(task, error)) + return; + queue = READ_ONCE(task->tk_waitqueue); + if (queue) + rpc_wake_up_queued_task(queue, task); } void rpc_exit(struct rpc_task *task, int status) @@ -907,10 +925,16 @@ static void __rpc_execute(struct rpc_task *task) * Perform the next FSM step or a pending callback. * * tk_action may be NULL if the task has been killed. - * In particular, note that rpc_killall_tasks may - * do this at any time, so beware when dereferencing. */ do_action = task->tk_action; + /* Tasks with an RPC error status should exit */ + if (do_action != rpc_exit_task && + (status = READ_ONCE(task->tk_rpc_status)) != 0) { + task->tk_status = status; + if (do_action != NULL) + do_action = rpc_exit_task; + } + /* Callbacks override all actions */ if (task->tk_callback) { do_action = task->tk_callback; task->tk_callback = NULL; @@ -933,14 +957,6 @@ static void __rpc_execute(struct rpc_task *task) } /* - * Signalled tasks should exit rather than sleep. - */ - if (RPC_SIGNALLED(task)) { - task->tk_rpc_status = -ERESTARTSYS; - rpc_exit(task, -ERESTARTSYS); - } - - /* * The queue->lock protects against races with * rpc_make_runnable(). * @@ -955,6 +971,12 @@ static void __rpc_execute(struct rpc_task *task) spin_unlock(&queue->lock); continue; } + /* Wake up any task that has an exit status */ + if (READ_ONCE(task->tk_rpc_status) != 0) { + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock(&queue->lock); + continue; + } rpc_clear_running(task); spin_unlock(&queue->lock); if (task_is_async) @@ -964,7 +986,7 @@ static void __rpc_execute(struct rpc_task *task) trace_rpc_task_sync_sleep(task, task->tk_action); status = out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_QUEUED, rpc_wait_bit_killable, - TASK_KILLABLE); + TASK_KILLABLE|TASK_FREEZABLE); if (status < 0) { /* * When a sync task receives a signal, it exits with @@ -972,10 +994,7 @@ static void __rpc_execute(struct rpc_task *task) * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - trace_rpc_task_signalled(task, task->tk_action); - set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); - task->tk_rpc_status = -ERESTARTSYS; - rpc_exit(task, -ERESTARTSYS); + rpc_signal_task(task); } trace_rpc_task_sync_wake(task, task->tk_action); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f8fae7815649..71dc26373444 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1788,7 +1788,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt) { int id; - id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL); + id = ida_alloc(&rpc_xprt_ids, GFP_KERNEL); if (id < 0) return id; @@ -1798,7 +1798,7 @@ static int xprt_alloc_id(struct rpc_xprt *xprt) static void xprt_free_id(struct rpc_xprt *xprt) { - ida_simple_remove(&rpc_xprt_ids, xprt->id); + ida_free(&rpc_xprt_ids, xprt->id); } struct rpc_xprt *xprt_alloc(struct net *net, size_t size, @@ -1822,10 +1822,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, goto out_free; list_add(&req->rq_list, &xprt->free); } - if (max_alloc > num_prealloc) - xprt->max_reqs = max_alloc; - else - xprt->max_reqs = num_prealloc; + xprt->max_reqs = max_t(unsigned int, max_alloc, num_prealloc); xprt->min_reqs = num_prealloc; xprt->num_reqs = num_prealloc; diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 685db598acbe..701250b305db 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -103,7 +103,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) { int id; - id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags); + id = ida_alloc(&rpc_xprtswitch_ids, gfp_flags); if (id < 0) return id; @@ -113,7 +113,7 @@ static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags) static void xprt_switch_free_id(struct rpc_xprt_switch *xps) { - ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id); + ida_free(&rpc_xprtswitch_ids, xps->xps_id); } /** diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index faba7136dd9a..e4d84a13c566 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -189,7 +189,7 @@ create_req: return NULL; size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE); - req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, size); if (!req) return NULL; if (rpcrdma_req_setup(r_xprt, req)) { diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index de0bdb6b729f..ffbf99894970 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -124,16 +124,16 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) unsigned int depth = ep->re_max_fr_depth; struct scatterlist *sg; struct ib_mr *frmr; - int rc; + + sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS, + ibdev_to_node(ep->re_id->device)); + if (!sg) + return -ENOMEM; frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth); if (IS_ERR(frmr)) goto out_mr_err; - sg = kmalloc_array(depth, sizeof(*sg), GFP_KERNEL); - if (!sg) - goto out_list_err; - mr->mr_xprt = r_xprt; mr->mr_ibmr = frmr; mr->mr_device = NULL; @@ -146,13 +146,9 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) return 0; out_mr_err: - rc = PTR_ERR(frmr); - trace_xprtrdma_frwr_alloc(mr, rc); - return rc; - -out_list_err: - ib_dereg_mr(frmr); - return -ENOMEM; + kfree(sg); + trace_xprtrdma_frwr_alloc(mr, PTR_ERR(frmr)); + return PTR_ERR(frmr); } /** diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 85c8cdda98b1..aa2227a7e552 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -119,12 +119,12 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } - page = alloc_page(RPCRDMA_DEF_GFP); + page = alloc_page(GFP_NOIO | __GFP_NOWARN); if (!page) return -ENOMEM; rqst->rq_buffer = page_address(page); - rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP); + rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, GFP_NOIO | __GFP_NOWARN); if (!rqst->rq_rbuffer) { put_page(page); return -ENOMEM; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index bcb37b51adf6..10bb2b929c6d 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -494,8 +494,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } trace_xprtrdma_op_connect(r_xprt, delay); - queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker, - delay); + queue_delayed_work(system_long_wq, &r_xprt->rx_connect_worker, delay); } /** diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2fbe9aaeec34..44b87e4274b4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -76,8 +76,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_ep_get(struct rpcrdma_ep *ep); static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, - gfp_t flags); +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); @@ -373,7 +372,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep; int rc; - ep = kzalloc(sizeof(*ep), GFP_KERNEL); + ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS); if (!ep) return -ENOTCONN; ep->re_xprt = &r_xprt->rx_xprt; @@ -606,7 +605,7 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) struct rpcrdma_sendctx *sc; sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), - GFP_KERNEL); + XPRTRDMA_GFP_FLAGS); if (!sc) return NULL; @@ -629,7 +628,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * Sends are posted. */ i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; - buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); + buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS); if (!buf->rb_sc_ctxs) return -ENOMEM; @@ -740,13 +739,16 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct ib_device *device = ep->re_id->device; unsigned int count; + /* Try to allocate enough to perform one full-sized I/O */ for (count = 0; count < ep->re_max_rdma_segs; count++) { struct rpcrdma_mr *mr; int rc; - mr = kzalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS, + ibdev_to_node(device)); if (!mr) break; @@ -791,38 +793,33 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) /* If there is no underlying connection, it's no use * to wake the refresh worker. */ - if (ep->re_connect_status == 1) { - /* The work is scheduled on a WQ_MEM_RECLAIM - * workqueue in order to prevent MR allocation - * from recursing into NFS during direct reclaim. - */ - queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); - } + if (ep->re_connect_status != 1) + return; + queue_work(system_highpri_wq, &buf->rb_refresh_worker); } /** * rpcrdma_req_create - Allocate an rpcrdma_req object * @r_xprt: controlling r_xprt * @size: initial size, in bytes, of send and receive buffers - * @flags: GFP flags passed to memory allocators * * Returns an allocated and fully initialized rpcrdma_req or NULL. */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, - gfp_t flags) +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + size_t size) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_req *req; - req = kzalloc(sizeof(*req), flags); + req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS); if (req == NULL) goto out1; - req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); + req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE); if (!req->rl_sendbuf) goto out2; - req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); + req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE); if (!req->rl_recvbuf) goto out3; @@ -858,7 +855,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; maxhdrsize *= sizeof(__be32); rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), - DMA_TO_DEVICE, GFP_KERNEL); + DMA_TO_DEVICE); if (!rb) goto out; @@ -929,12 +926,12 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - rep = kzalloc(sizeof(*rep), GFP_KERNEL); + rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); if (rep == NULL) goto out; rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, - DMA_FROM_DEVICE, GFP_KERNEL); + DMA_FROM_DEVICE); if (!rep->rr_rdmabuf) goto out_free; @@ -1064,8 +1061,8 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { struct rpcrdma_req *req; - req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, - GFP_KERNEL); + req = rpcrdma_req_create(r_xprt, + RPCRDMA_V1_DEF_INLINE_SIZE * 2); if (!req) goto out; list_add(&req->rl_list, &buf->rb_send_bufs); @@ -1235,15 +1232,14 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) * or Replies they may be registered externally via frwr_map. */ static struct rpcrdma_regbuf * -rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, - gfp_t flags) +rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) { struct rpcrdma_regbuf *rb; - rb = kmalloc(sizeof(*rb), flags); + rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); if (!rb) return NULL; - rb->rg_data = kmalloc(size, flags); + rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); if (!rb->rg_data) { kfree(rb); return NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c79f92eeda76..5e5ff6784ef5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -149,7 +149,11 @@ static inline void *rdmab_data(const struct rpcrdma_regbuf *rb) return rb->rg_data; } -#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) +/* Do not use emergency memory reserves, and fail quickly if memory + * cannot be allocated easily. These flags may be used wherever there + * is robust logic to handle a failure to allocate. + */ +#define XPRTRDMA_GFP_FLAGS (__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) /* To ensure a transport can always make forward progress, * the number of RDMA segments allowed in header chunk lists @@ -467,8 +471,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp); /* * Buffer calls - xprtrdma/verbs.c */ -struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, - gfp_t flags); +struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, + size_t size); int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e976007f4fd0..f34d5427b66c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -261,7 +261,7 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_LOCAL: sun = xs_addr_un(xprt); - strlcpy(buf, sun->sun_path, sizeof(buf)); + strscpy(buf, sun->sun_path, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); break; @@ -1978,8 +1978,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) * we'll need to figure out how to pass a namespace to * connect. */ - task->tk_rpc_status = -ENOTCONN; - rpc_exit(task, -ENOTCONN); + rpc_task_set_rpc_status(task, -ENOTCONN); goto out_wake; } ret = xs_local_setup_socket(transport); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0f08c3177872..15dbb392c875 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2560,13 +2560,14 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, struct sk_buff *last, unsigned int last_len, bool freezable) { + unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; struct sk_buff *tail; DEFINE_WAIT(wait); unix_state_lock(sk); for (;;) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, state); tail = skb_peek_tail(&sk->sk_receive_queue); if (tail != last || @@ -2579,10 +2580,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - if (freezable) - timeo = freezable_schedule_timeout(timeo); - else - timeo = schedule_timeout(timeo); + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8ff8b1c040f0..597c52236514 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13265,7 +13265,9 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, wake_mask_size); if (tok) { cfg->tokens_size = tokens_size; - memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size); + cfg->payload_tok = *tok; + memcpy(cfg->payload_tok.token_stream, tok->token_stream, + tokens_size); } trig->tcp = cfg; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 5382fc2003db..806a5f1330ff 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -143,18 +143,12 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev, lockdep_assert_held(&rdev->bss_lock); bss->refcount++; - if (bss->pub.hidden_beacon_bss) { - bss = container_of(bss->pub.hidden_beacon_bss, - struct cfg80211_internal_bss, - pub); - bss->refcount++; - } - if (bss->pub.transmitted_bss) { - bss = container_of(bss->pub.transmitted_bss, - struct cfg80211_internal_bss, - pub); - bss->refcount++; - } + + if (bss->pub.hidden_beacon_bss) + bss_from_pub(bss->pub.hidden_beacon_bss)->refcount++; + + if (bss->pub.transmitted_bss) + bss_from_pub(bss->pub.transmitted_bss)->refcount++; } static inline void bss_ref_put(struct cfg80211_registered_device *rdev, @@ -304,7 +298,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen); tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie; - while (tmp_old + tmp_old[1] + 2 - ie <= ielen) { + while (tmp_old + 2 - ie <= ielen && + tmp_old + tmp_old[1] + 2 - ie <= ielen) { if (tmp_old[0] == 0) { tmp_old++; continue; @@ -364,7 +359,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * copied to new ie, skip ssid, capability, bssid-index ie */ tmp_new = sub_copy; - while (tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) { + while (tmp_new + 2 - sub_copy <= subie_len && + tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) { if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP || tmp_new[0] == WLAN_EID_SSID)) { memcpy(pos, tmp_new, tmp_new[1] + 2); @@ -427,6 +423,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, rcu_read_unlock(); + /* + * This is a bit weird - it's not on the list, but already on another + * one! The only way that could happen is if there's some BSSID/SSID + * shared by multiple APs in their multi-BSSID profiles, potentially + * with hidden SSID mixed in ... ignore it. + */ + if (!list_empty(&nontrans_bss->nontrans_list)) + return -EINVAL; + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; @@ -1602,6 +1607,23 @@ struct cfg80211_non_tx_bss { u8 bssid_index; }; +static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known, + const struct cfg80211_bss_ies *new_ies, + const struct cfg80211_bss_ies *old_ies) +{ + struct cfg80211_internal_bss *bss; + + /* Assign beacon IEs to all sub entries */ + list_for_each_entry(bss, &known->hidden_list, hidden_list) { + const struct cfg80211_bss_ies *ies; + + ies = rcu_access_pointer(bss->pub.beacon_ies); + WARN_ON(ies != old_ies); + + rcu_assign_pointer(bss->pub.beacon_ies, new_ies); + } +} + static bool cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *known, @@ -1625,7 +1647,6 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } else if (rcu_access_pointer(new->pub.beacon_ies)) { const struct cfg80211_bss_ies *old; - struct cfg80211_internal_bss *bss; if (known->pub.hidden_beacon_bss && !list_empty(&known->hidden_list)) { @@ -1653,16 +1674,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, if (old == rcu_access_pointer(known->pub.ies)) rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); - /* Assign beacon IEs to all sub entries */ - list_for_each_entry(bss, &known->hidden_list, hidden_list) { - const struct cfg80211_bss_ies *ies; - - ies = rcu_access_pointer(bss->pub.beacon_ies); - WARN_ON(ies != old); - - rcu_assign_pointer(bss->pub.beacon_ies, - new->pub.beacon_ies); - } + cfg80211_update_hidden_bsses(known, new->pub.beacon_ies, old); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); @@ -1739,6 +1751,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, new->refcount = 1; INIT_LIST_HEAD(&new->hidden_list); INIT_LIST_HEAD(&new->pub.nontrans_list); + /* we'll set this later if it was non-NULL */ + new->pub.transmitted_bss = NULL; if (rcu_access_pointer(tmp->pub.proberesp_ies)) { hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); @@ -2021,10 +2035,15 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { - if (__cfg80211_unlink_bss(rdev, res)) + if (__cfg80211_unlink_bss(rdev, res)) { rdev->bss_generation++; + res = NULL; + } } spin_unlock_bh(&rdev->bss_lock); + + if (!res) + return NULL; } trace_cfg80211_return_bss(&res->pub); @@ -2143,6 +2162,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) { if (elem->datalen < 4) continue; + if (elem->data[0] < 1 || (int)elem->data[0] > 8) + continue; for_each_element(sub, elem->data + 1, elem->datalen - 1) { u8 profile_len; @@ -2279,7 +2300,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, size_t new_ie_len; struct cfg80211_bss_ies *new_ies; const struct cfg80211_bss_ies *old; - u8 cpy_len; + size_t cpy_len; lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock); @@ -2346,6 +2367,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, } else { old = rcu_access_pointer(nontrans_bss->beacon_ies); rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies); + cfg80211_update_hidden_bsses(bss_from_pub(nontrans_bss), + new_ies, old); rcu_assign_pointer(nontrans_bss->ies, new_ies); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); diff --git a/net/wireless/util.c b/net/wireless/util.c index 01493568a21d..1f285b515028 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -559,7 +559,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, return -1; hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset; - if (skb->len < hdrlen + 8) + if (skb->len < hdrlen) return -1; /* convert IEEE 802.11 header + possible LLC headers into Ethernet @@ -574,8 +574,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, memcpy(tmp.h_dest, ieee80211_get_DA(hdr), ETH_ALEN); memcpy(tmp.h_source, ieee80211_get_SA(hdr), ETH_ALEN); - if (iftype == NL80211_IFTYPE_MESH_POINT) - skb_copy_bits(skb, hdrlen, &mesh_flags, 1); + if (iftype == NL80211_IFTYPE_MESH_POINT && + skb_copy_bits(skb, hdrlen, &mesh_flags, 1) < 0) + return -1; mesh_flags &= MESH_FLAGS_AE; @@ -595,11 +596,12 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, if (iftype == NL80211_IFTYPE_MESH_POINT) { if (mesh_flags == MESH_FLAGS_AE_A4) return -1; - if (mesh_flags == MESH_FLAGS_AE_A5_A6) { - skb_copy_bits(skb, hdrlen + - offsetof(struct ieee80211s_hdr, eaddr1), - tmp.h_dest, 2 * ETH_ALEN); - } + if (mesh_flags == MESH_FLAGS_AE_A5_A6 && + skb_copy_bits(skb, hdrlen + + offsetof(struct ieee80211s_hdr, eaddr1), + tmp.h_dest, 2 * ETH_ALEN) < 0) + return -1; + hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags); } break; @@ -613,10 +615,11 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, if (iftype == NL80211_IFTYPE_MESH_POINT) { if (mesh_flags == MESH_FLAGS_AE_A5_A6) return -1; - if (mesh_flags == MESH_FLAGS_AE_A4) - skb_copy_bits(skb, hdrlen + - offsetof(struct ieee80211s_hdr, eaddr1), - tmp.h_source, ETH_ALEN); + if (mesh_flags == MESH_FLAGS_AE_A4 && + skb_copy_bits(skb, hdrlen + + offsetof(struct ieee80211s_hdr, eaddr1), + tmp.h_source, ETH_ALEN) < 0) + return -1; hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags); } break; @@ -628,16 +631,15 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, break; } - skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)); - tmp.h_proto = payload.proto; - - if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) && - tmp.h_proto != htons(ETH_P_AARP) && - tmp.h_proto != htons(ETH_P_IPX)) || - ether_addr_equal(payload.hdr, bridge_tunnel_header))) { + if (likely(skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 && + ((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) && + payload.proto != htons(ETH_P_AARP) && + payload.proto != htons(ETH_P_IPX)) || + ether_addr_equal(payload.hdr, bridge_tunnel_header)))) { /* remove RFC1042 or Bridge-Tunnel encapsulation and * replace EtherType */ hdrlen += ETH_ALEN + 2; + tmp.h_proto = payload.proto; skb_postpull_rcsum(skb, &payload, ETH_ALEN + 2); } else { tmp.h_proto = htons(skb->len - hdrlen); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 76a80a41615b..fe8765c4075d 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -468,6 +468,7 @@ void wireless_send_event(struct net_device * dev, struct __compat_iw_event *compat_event; struct compat_iw_point compat_wrqu; struct sk_buff *compskb; + int ptr_len; #endif /* @@ -582,6 +583,9 @@ void wireless_send_event(struct net_device * dev, nlmsg_end(skb, nlh); #ifdef CONFIG_COMPAT hdr_len = compat_event_type_size[descr->header_type]; + + /* ptr_len is remaining size in event header apart from LCP */ + ptr_len = hdr_len - IW_EV_COMPAT_LCP_LEN; event_len = hdr_len + extra_len; compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -612,16 +616,15 @@ void wireless_send_event(struct net_device * dev, if (descr->header_type == IW_HEADER_TYPE_POINT) { compat_wrqu.length = wrqu->data.length; compat_wrqu.flags = wrqu->data.flags; - memcpy(&compat_event->pointer, - ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, - hdr_len - IW_EV_COMPAT_LCP_LEN); + memcpy(compat_event->ptr_bytes, + ((char *)&compat_wrqu) + IW_EV_COMPAT_POINT_OFF, + ptr_len); if (extra_len) - memcpy(((char *) compat_event) + hdr_len, - extra, extra_len); + memcpy(&compat_event->ptr_bytes[ptr_len], + extra, extra_len); } else { /* extra_len must be zero, so no if (extra) needed */ - memcpy(&compat_event->pointer, wrqu, - hdr_len - IW_EV_COMPAT_LCP_LEN); + memcpy(compat_event->ptr_bytes, wrqu, ptr_len); } nlmsg_end(compskb, nlh); |