summaryrefslogtreecommitdiff
path: root/net/mctp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mctp')
-rw-r--r--net/mctp/Kconfig13
-rw-r--r--net/mctp/Makefile3
-rw-r--r--net/mctp/af_mctp.c395
-rw-r--r--net/mctp/device.c423
-rw-r--r--net/mctp/neigh.c342
-rw-r--r--net/mctp/route.c1116
6 files changed, 2292 insertions, 0 deletions
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
new file mode 100644
index 000000000000..2cdf3d0a28c9
--- /dev/null
+++ b/net/mctp/Kconfig
@@ -0,0 +1,13 @@
+
+menuconfig MCTP
+ depends on NET
+ tristate "MCTP core protocol support"
+ help
+ Management Component Transport Protocol (MCTP) is an in-system
+ protocol for communicating between management controllers and
+ their managed devices (peripherals, host processors, etc.). The
+ protocol is defined by DMTF specification DSP0236.
+
+ This option enables core MCTP support. For communicating with other
+ devices, you'll want to enable a driver for a specific hardware
+ channel.
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
new file mode 100644
index 000000000000..0171333384d7
--- /dev/null
+++ b/net/mctp/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MCTP) += mctp.o
+mctp-objs := af_mctp.o device.o route.o neigh.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
new file mode 100644
index 000000000000..a9526ac29dff
--- /dev/null
+++ b/net/mctp/af_mctp.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/mctp.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+/* socket implementation */
+
+static int mctp_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (sk) {
+ sock->sk = NULL;
+ sk->sk_prot->close(sk, 0);
+ }
+
+ return 0;
+}
+
+static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+ struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct sockaddr_mctp *smctp;
+ int rc;
+
+ if (addrlen < sizeof(*smctp))
+ return -EINVAL;
+
+ if (addr->sa_family != AF_MCTP)
+ return -EAFNOSUPPORT;
+
+ if (!capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ /* it's a valid sockaddr for MCTP, cast and do protocol checks */
+ smctp = (struct sockaddr_mctp *)addr;
+
+ lock_sock(sk);
+
+ /* TODO: allow rebind */
+ if (sk_hashed(sk)) {
+ rc = -EADDRINUSE;
+ goto out_release;
+ }
+ msk->bind_net = smctp->smctp_network;
+ msk->bind_addr = smctp->smctp_addr.s_addr;
+ msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
+
+ rc = sk->sk_prot->hash(sk);
+
+out_release:
+ release_sock(sk);
+
+ return rc;
+}
+
+static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
+ int rc, addrlen = msg->msg_namelen;
+ struct sock *sk = sock->sk;
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct sk_buff *skb;
+
+ if (addr) {
+ if (addrlen < sizeof(struct sockaddr_mctp))
+ return -EINVAL;
+ if (addr->smctp_family != AF_MCTP)
+ return -EINVAL;
+ if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ return -EINVAL;
+
+ } else {
+ /* TODO: connect()ed sockets */
+ return -EDESTADDRREQ;
+ }
+
+ if (!capable(CAP_NET_RAW))
+ return -EACCES;
+
+ if (addr->smctp_network == MCTP_NET_ANY)
+ addr->smctp_network = mctp_default_net(sock_net(sk));
+
+ rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr);
+ if (!rt)
+ return -EHOSTUNREACH;
+
+ skb = sock_alloc_send_skb(sk, hlen + 1 + len,
+ msg->msg_flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ skb_reserve(skb, hlen);
+
+ /* set type as fist byte in payload */
+ *(u8 *)skb_put(skb, 1) = addr->smctp_type;
+
+ rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
+ if (rc < 0) {
+ kfree_skb(skb);
+ return rc;
+ }
+
+ /* set up cb */
+ cb = __mctp_cb(skb);
+ cb->net = addr->smctp_network;
+
+ rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
+ addr->smctp_tag);
+
+ return rc ? : len;
+}
+
+static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ size_t msglen;
+ u8 type;
+ int rc;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EOPNOTSUPP;
+
+ skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ if (!skb->len) {
+ rc = 0;
+ goto out_free;
+ }
+
+ /* extract message type, remove from data */
+ type = *((u8 *)skb->data);
+ msglen = skb->len - 1;
+
+ if (len < msglen)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ len = msglen;
+
+ rc = skb_copy_datagram_msg(skb, 1, msg, len);
+ if (rc < 0)
+ goto out_free;
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (addr) {
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ /* TODO: expand mctp_skb_cb for header fields? */
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+
+ addr = msg->msg_name;
+ addr->smctp_family = AF_MCTP;
+ addr->smctp_network = cb->net;
+ addr->smctp_addr.s_addr = hdr->src;
+ addr->smctp_type = type;
+ addr->smctp_tag = hdr->flags_seq_tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+ msg->msg_namelen = sizeof(*addr);
+ }
+
+ rc = len;
+
+ if (flags & MSG_TRUNC)
+ rc = msglen;
+
+out_free:
+ skb_free_datagram(sk, skb);
+ return rc;
+}
+
+static int mctp_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ return -EINVAL;
+}
+
+static int mctp_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ return -EINVAL;
+}
+
+static const struct proto_ops mctp_dgram_ops = {
+ .family = PF_MCTP,
+ .release = mctp_release,
+ .bind = mctp_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .gettstamp = sock_gettstamp,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = mctp_setsockopt,
+ .getsockopt = mctp_getsockopt,
+ .sendmsg = mctp_sendmsg,
+ .recvmsg = mctp_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static int mctp_sk_init(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ INIT_HLIST_HEAD(&msk->keys);
+ return 0;
+}
+
+static void mctp_sk_close(struct sock *sk, long timeout)
+{
+ sk_common_release(sk);
+}
+
+static int mctp_sk_hash(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ mutex_lock(&net->mctp.bind_lock);
+ sk_add_node_rcu(sk, &net->mctp.binds);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ return 0;
+}
+
+static void mctp_sk_unhash(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct net *net = sock_net(sk);
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ unsigned long flags;
+
+ /* remove from any type-based binds */
+ mutex_lock(&net->mctp.bind_lock);
+ sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ /* remove tag allocations */
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ hlist_del_rcu(&key->sklist);
+ hlist_del_rcu(&key->hlist);
+
+ spin_lock(&key->reasm_lock);
+ if (key->reasm_head)
+ kfree_skb(key->reasm_head);
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock(&key->reasm_lock);
+
+ kfree_rcu(key, rcu);
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ synchronize_rcu();
+}
+
+static struct proto mctp_proto = {
+ .name = "MCTP",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct mctp_sock),
+ .init = mctp_sk_init,
+ .close = mctp_sk_close,
+ .hash = mctp_sk_hash,
+ .unhash = mctp_sk_unhash,
+};
+
+static int mctp_pf_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ const struct proto_ops *ops;
+ struct proto *proto;
+ struct sock *sk;
+ int rc;
+
+ if (protocol)
+ return -EPROTONOSUPPORT;
+
+ /* only datagram sockets are supported */
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ proto = &mctp_proto;
+ ops = &mctp_dgram_ops;
+
+ sock->state = SS_UNCONNECTED;
+ sock->ops = ops;
+
+ sk = sk_alloc(net, PF_MCTP, GFP_KERNEL, proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+
+ rc = 0;
+ if (sk->sk_prot->init)
+ rc = sk->sk_prot->init(sk);
+
+ if (rc)
+ goto err_sk_put;
+
+ return 0;
+
+err_sk_put:
+ sock_orphan(sk);
+ sock_put(sk);
+ return rc;
+}
+
+static struct net_proto_family mctp_pf = {
+ .family = PF_MCTP,
+ .create = mctp_pf_create,
+ .owner = THIS_MODULE,
+};
+
+static __init int mctp_init(void)
+{
+ int rc;
+
+ /* ensure our uapi tag definitions match the header format */
+ BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
+ BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);
+
+ pr_info("mctp: management component transport protocol core\n");
+
+ rc = sock_register(&mctp_pf);
+ if (rc)
+ return rc;
+
+ rc = proto_register(&mctp_proto, 0);
+ if (rc)
+ goto err_unreg_sock;
+
+ rc = mctp_routes_init();
+ if (rc)
+ goto err_unreg_proto;
+
+ rc = mctp_neigh_init();
+ if (rc)
+ goto err_unreg_proto;
+
+ mctp_device_init();
+
+ return 0;
+
+err_unreg_proto:
+ proto_unregister(&mctp_proto);
+err_unreg_sock:
+ sock_unregister(PF_MCTP);
+
+ return rc;
+}
+
+static __exit void mctp_exit(void)
+{
+ mctp_device_exit();
+ mctp_neigh_exit();
+ mctp_routes_exit();
+ proto_unregister(&mctp_proto);
+ sock_unregister(PF_MCTP);
+}
+
+module_init(mctp_init);
+module_exit(mctp_exit);
+
+MODULE_DESCRIPTION("MCTP core");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
+
+MODULE_ALIAS_NETPROTO(PF_MCTP);
diff --git a/net/mctp/device.c b/net/mctp/device.c
new file mode 100644
index 000000000000..b9f38e765f61
--- /dev/null
+++ b/net/mctp/device.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - device implementation.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_link.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+
+#include <net/addrconf.h>
+#include <net/netlink.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+struct mctp_dump_cb {
+ int h;
+ int idx;
+ size_t a_idx;
+};
+
+/* unlocked: caller must hold rcu_read_lock */
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
+{
+ return rcu_dereference(dev->mctp_ptr);
+}
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
+{
+ return rtnl_dereference(dev->mctp_ptr);
+}
+
+static void mctp_dev_destroy(struct mctp_dev *mdev)
+{
+ struct net_device *dev = mdev->dev;
+
+ dev_put(dev);
+ kfree_rcu(mdev, rcu);
+}
+
+static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct ifaddrmsg *hdr;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifa_family = AF_MCTP;
+ hdr->ifa_prefixlen = 0;
+ hdr->ifa_flags = 0;
+ hdr->ifa_scope = 0;
+ hdr->ifa_index = mdev->dev->ifindex;
+
+ if (nla_put_u8(skb, IFA_LOCAL, eid))
+ goto cancel;
+
+ if (nla_put_u8(skb, IFA_ADDRESS, eid))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct mctp_dump_cb *mcb = (void *)cb->ctx;
+ int rc = 0;
+
+ for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) {
+ rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]);
+ if (rc < 0)
+ break;
+ }
+
+ return rc;
+}
+
+static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct mctp_dump_cb *mcb = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct hlist_head *head;
+ struct net_device *dev;
+ struct ifaddrmsg *hdr;
+ struct mctp_dev *mdev;
+ int ifindex;
+ int idx, rc;
+
+ hdr = nlmsg_data(cb->nlh);
+ // filter by ifindex if requested
+ ifindex = hdr->ifa_index;
+
+ rcu_read_lock();
+ for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[mcb->h];
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx >= mcb->idx &&
+ (ifindex == 0 || ifindex == dev->ifindex)) {
+ mdev = __mctp_dev_get(dev);
+ if (mdev) {
+ rc = mctp_dump_dev_addrinfo(mdev,
+ skb, cb);
+ // Error indicates full buffer, this
+ // callback will get retried.
+ if (rc < 0)
+ goto out;
+ }
+ }
+ idx++;
+ // reset for next iteration
+ mcb->a_idx = 0;
+ }
+ }
+out:
+ rcu_read_unlock();
+ mcb->idx = idx;
+
+ return skb->len;
+}
+
+static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = {
+ [IFA_ADDRESS] = { .type = NLA_U8 },
+ [IFA_LOCAL] = { .type = NLA_U8 },
+};
+
+static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ unsigned long flags;
+ u8 *tmp_addrs;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (!mctp_address_ok(addr->s_addr))
+ return -EINVAL;
+
+ /* Prevent duplicates. Under RTNL so don't need to lock for reading */
+ if (memchr(mdev->addrs, addr->s_addr, mdev->num_addrs))
+ return -EEXIST;
+
+ tmp_addrs = kmalloc(mdev->num_addrs + 1, GFP_KERNEL);
+ if (!tmp_addrs)
+ return -ENOMEM;
+ memcpy(tmp_addrs, mdev->addrs, mdev->num_addrs);
+ tmp_addrs[mdev->num_addrs] = addr->s_addr;
+
+ /* Lock to write */
+ spin_lock_irqsave(&mdev->addrs_lock, flags);
+ mdev->num_addrs++;
+ swap(mdev->addrs, tmp_addrs);
+ spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+ kfree(tmp_addrs);
+
+ mctp_route_add_local(mdev, addr->s_addr);
+
+ return 0;
+}
+
+static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ unsigned long flags;
+ u8 *pos;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ pos = memchr(mdev->addrs, addr->s_addr, mdev->num_addrs);
+ if (!pos)
+ return -ENOENT;
+
+ rc = mctp_route_remove_local(mdev, addr->s_addr);
+ // we can ignore -ENOENT in the case a route was already removed
+ if (rc < 0 && rc != -ENOENT)
+ return rc;
+
+ spin_lock_irqsave(&mdev->addrs_lock, flags);
+ memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs));
+ mdev->num_addrs--;
+ spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+ return 0;
+}
+
+static struct mctp_dev *mctp_add_dev(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ ASSERT_RTNL();
+
+ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&mdev->addrs_lock);
+
+ mdev->net = mctp_default_net(dev_net(dev));
+
+ /* associate to net_device */
+ rcu_assign_pointer(dev->mctp_ptr, mdev);
+ dev_hold(dev);
+ mdev->dev = dev;
+
+ return mdev;
+}
+
+static int mctp_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev, u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODATA;
+ if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static size_t mctp_get_link_af_size(const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+ unsigned int ret;
+
+ /* caller holds RCU */
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ return 0;
+ ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ return ret;
+}
+
+static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = {
+ [IFLA_MCTP_NET] = { .type = NLA_U32 },
+};
+
+static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_MCTP_MAX + 1];
+ struct mctp_dev *mdev;
+ int rc;
+
+ rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy,
+ NULL);
+ if (rc)
+ return rc;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return 0;
+
+ if (tb[IFLA_MCTP_NET])
+ WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET]));
+
+ return 0;
+}
+
+static void mctp_unregister(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+
+ if (!mdev)
+ return;
+
+ RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
+
+ mctp_route_remove_dev(mdev);
+ mctp_neigh_remove_dev(mdev);
+ kfree(mdev->addrs);
+
+ mctp_dev_destroy(mdev);
+}
+
+static int mctp_register(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ /* Already registered? */
+ if (rtnl_dereference(dev->mctp_ptr))
+ return 0;
+
+ /* only register specific types; MCTP-specific and loopback for now */
+ if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+ return 0;
+
+ mdev = mctp_add_dev(dev);
+ if (IS_ERR(mdev))
+ return PTR_ERR(mdev);
+
+ return 0;
+}
+
+static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ int rc;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ rc = mctp_register(dev);
+ if (rc)
+ return notifier_from_errno(rc);
+ break;
+ case NETDEV_UNREGISTER:
+ mctp_unregister(dev);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct rtnl_af_ops mctp_af_ops = {
+ .family = AF_MCTP,
+ .fill_link_af = mctp_fill_link_af,
+ .get_link_af_size = mctp_get_link_af_size,
+ .set_link_af = mctp_set_link_af,
+};
+
+static struct notifier_block mctp_dev_nb = {
+ .notifier_call = mctp_dev_notify,
+ .priority = ADDRCONF_NOTIFY_PRIORITY,
+};
+
+void __init mctp_device_init(void)
+{
+ register_netdevice_notifier(&mctp_dev_nb);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
+ NULL, mctp_dump_addrinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
+ mctp_rtm_newaddr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR,
+ mctp_rtm_deladdr, NULL, 0);
+ rtnl_af_register(&mctp_af_ops);
+}
+
+void __exit mctp_device_exit(void)
+{
+ rtnl_af_unregister(&mctp_af_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELADDR);
+ rtnl_unregister(PF_MCTP, RTM_NEWADDR);
+ rtnl_unregister(PF_MCTP, RTM_GETADDR);
+
+ unregister_netdevice_notifier(&mctp_dev_nb);
+}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
new file mode 100644
index 000000000000..90ed2f02d1fb
--- /dev/null
+++ b/net/mctp/neigh.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
+ enum mctp_neigh_source source,
+ size_t lladdr_len, const void *lladdr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ if (mctp_neigh_lookup(mdev, eid, NULL) == 0) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if (lladdr_len > sizeof(neigh->ha)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ neigh = kzalloc(sizeof(*neigh), GFP_KERNEL);
+ if (!neigh) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&neigh->list);
+ neigh->dev = mdev;
+ dev_hold(neigh->dev->dev);
+ neigh->eid = eid;
+ neigh->source = source;
+ memcpy(neigh->ha, lladdr, lladdr_len);
+
+ list_add_rcu(&neigh->list, &net->mctp.neighbours);
+ rc = 0;
+out:
+ mutex_unlock(&net->mctp.neigh_lock);
+ return rc;
+}
+
+static void __mctp_neigh_free(struct rcu_head *rcu)
+{
+ struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu);
+
+ dev_put(neigh->dev->dev);
+ kfree(neigh);
+}
+
+/* Removes all neighbour entries referring to a device */
+void mctp_neigh_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+}
+
+// TODO: add a "source" flag so netlink can only delete static neighbours?
+static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+ bool dropped = false;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev && neigh->eid == eid) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ dropped = true;
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+ return dropped ? 0 : -ENOENT;
+}
+
+static const struct nla_policy nd_mctp_policy[NDA_MAX + 1] = {
+ [NDA_DST] = { .type = NLA_U8 },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+};
+
+static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ struct nlattr *tb[NDA_MAX + 1];
+ int rc;
+ mctp_eid_t eid;
+ void *lladdr;
+ int lladdr_len;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "lladdr too large?");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+
+ if (!tb[NDA_LLADDR]) {
+ NL_SET_ERR_MSG(extack, "Neighbour lladdr must be specified");
+ return -EINVAL;
+ }
+
+ eid = nla_get_u8(tb[NDA_DST]);
+ if (!mctp_address_ok(eid)) {
+ NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
+ return -EINVAL;
+ }
+
+ lladdr = nla_data(tb[NDA_LLADDR]);
+ lladdr_len = nla_len(tb[NDA_LLADDR]);
+
+ ndm = nlmsg_data(nlh);
+
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (lladdr_len != dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Wrong lladdr length");
+ return -EINVAL;
+ }
+
+ return mctp_neigh_add(mdev, eid, MCTP_NEIGH_STATIC,
+ lladdr_len, lladdr);
+}
+
+static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ int rc;
+ mctp_eid_t eid;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+ eid = nla_get_u8(tb[NDA_DST]);
+
+ ndm = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ return mctp_neigh_remove(mdev, eid);
+}
+
+static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event,
+ unsigned int flags, struct mctp_neigh *neigh)
+{
+ struct net_device *dev = neigh->dev->dev;
+ struct nlmsghdr *nlh;
+ struct ndmsg *hdr;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ndm_family = AF_MCTP;
+ hdr->ndm_ifindex = dev->ifindex;
+ hdr->ndm_state = 0; // TODO other state bits?
+ if (neigh->source == MCTP_NEIGH_STATIC)
+ hdr->ndm_state |= NUD_PERMANENT;
+ hdr->ndm_flags = 0;
+ hdr->ndm_type = RTN_UNICAST; // TODO: is loopback RTN_LOCAL?
+
+ if (nla_put_u8(skb, NDA_DST, neigh->eid))
+ goto cancel;
+
+ if (nla_put(skb, NDA_LLADDR, dev->addr_len, neigh->ha))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_rtm_getneigh(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int rc, idx, req_ifindex;
+ struct mctp_neigh *neigh;
+ struct ndmsg *ndmsg;
+ struct {
+ int idx;
+ } *cbctx = (void *)cb->ctx;
+
+ ndmsg = nlmsg_data(cb->nlh);
+ req_ifindex = ndmsg->ndm_ifindex;
+
+ idx = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (idx < cbctx->idx)
+ goto cont;
+
+ rc = 0;
+ if (req_ifindex == 0 || req_ifindex == neigh->dev->dev->ifindex)
+ rc = mctp_fill_neigh(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, NLM_F_MULTI, neigh);
+
+ if (rc)
+ break;
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ cbctx->idx = idx;
+ return skb->len;
+}
+
+int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc = -EHOSTUNREACH; // TODO: or ENOENT?
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (mdev == neigh->dev && eid == neigh->eid) {
+ if (ret_hwaddr)
+ memcpy(ret_hwaddr, neigh->ha,
+ sizeof(neigh->ha));
+ rc = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return rc;
+}
+
+/* namespace registration */
+static int __net_init mctp_neigh_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->neighbours);
+ mutex_init(&ns->neigh_lock);
+ return 0;
+}
+
+static void __net_exit mctp_neigh_net_exit(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+ struct mctp_neigh *neigh;
+
+ list_for_each_entry(neigh, &ns->neighbours, list)
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+}
+
+/* net namespace implementation */
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_neigh_net_init,
+ .exit = mctp_neigh_net_exit,
+};
+
+int __init mctp_neigh_init(void)
+{
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
+ mctp_rtm_newneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
+ mctp_rtm_delneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
+ NULL, mctp_rtm_getneigh, 0);
+
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_neigh_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
+}
diff --git a/net/mctp/route.c b/net/mctp/route.c
new file mode 100644
index 000000000000..5265525011ad
--- /dev/null
+++ b/net/mctp/route.c
@@ -0,0 +1,1116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/if_arp.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static const unsigned int mctp_message_maxlen = 64 * 1024;
+
+/* route output callbacks */
+static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *mh;
+ struct sock *sk;
+ u8 type;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ /* TODO: look up in skb->cb? */
+ mh = mctp_hdr(skb);
+
+ if (!skb_headlen(skb))
+ return NULL;
+
+ type = (*(u8 *)skb->data) & 0x7f;
+
+ sk_for_each_rcu(sk, &net->mctp.binds) {
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
+ continue;
+
+ if (msk->bind_type != type)
+ continue;
+
+ if (msk->bind_addr != MCTP_ADDR_ANY &&
+ msk->bind_addr != mh->dest)
+ continue;
+
+ return msk;
+ }
+
+ return NULL;
+}
+
+static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
+ mctp_eid_t peer, u8 tag)
+{
+ if (key->local_addr != local)
+ return false;
+
+ if (key->peer_addr != peer)
+ return false;
+
+ if (key->tag != tag)
+ return false;
+
+ return true;
+}
+
+static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
+ mctp_eid_t peer)
+{
+ struct mctp_sk_key *key, *ret;
+ struct mctp_hdr *mh;
+ u8 tag;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ mh = mctp_hdr(skb);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ ret = NULL;
+
+ hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
+ if (mctp_key_match(key, mh->dest, peer, tag)) {
+ ret = key;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+ mctp_eid_t local, mctp_eid_t peer,
+ u8 tag, gfp_t gfp)
+{
+ struct mctp_sk_key *key;
+
+ key = kzalloc(sizeof(*key), gfp);
+ if (!key)
+ return NULL;
+
+ key->peer_addr = peer;
+ key->local_addr = local;
+ key->tag = tag;
+ key->sk = &msk->sk;
+ spin_lock_init(&key->reasm_lock);
+
+ return key;
+}
+
+static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *tmp;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
+ if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
+ key->tag)) {
+ rc = -EEXIST;
+ break;
+ }
+ }
+
+ if (!rc) {
+ hlist_add_head(&key->hlist, &net->mctp.keys);
+ hlist_add_head(&key->sklist, &msk->keys);
+ }
+
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+/* Must be called with key->reasm_lock, which it will release. Will schedule
+ * the key for an RCU free.
+ */
+static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags)
+ __releases(&key->reasm_lock)
+{
+ struct sk_buff *skb;
+
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock_irqrestore(&key->reasm_lock, flags);
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_del_rcu(&key->hlist);
+ hlist_del_rcu(&key->sklist);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ kfree_rcu(key, rcu);
+
+ if (skb)
+ kfree_skb(skb);
+}
+
+static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ u8 exp_seq, this_seq;
+
+ this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
+ & MCTP_HDR_SEQ_MASK;
+
+ if (!key->reasm_head) {
+ key->reasm_head = skb;
+ key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+ key->last_seq = this_seq;
+ return 0;
+ }
+
+ exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
+
+ if (this_seq != exp_seq)
+ return -EINVAL;
+
+ if (key->reasm_head->len + skb->len > mctp_message_maxlen)
+ return -EINVAL;
+
+ skb->next = NULL;
+ skb->sk = NULL;
+ *key->reasm_tailp = skb;
+ key->reasm_tailp = &skb->next;
+
+ key->last_seq = this_seq;
+
+ key->reasm_head->data_len += skb->len;
+ key->reasm_head->len += skb->len;
+ key->reasm_head->truesize += skb->truesize;
+
+ return 0;
+}
+
+static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct mctp_sk_key *key;
+ struct mctp_sock *msk;
+ struct mctp_hdr *mh;
+ unsigned long f;
+ u8 tag, flags;
+ int rc;
+
+ msk = NULL;
+ rc = -EINVAL;
+
+ /* we may be receiving a locally-routed packet; drop source sk
+ * accounting
+ */
+ skb_orphan(skb);
+
+ /* ensure we have enough data for a header and a type */
+ if (skb->len < sizeof(struct mctp_hdr) + 1)
+ goto out;
+
+ /* grab header, advance data ptr */
+ mh = mctp_hdr(skb);
+ skb_pull(skb, sizeof(struct mctp_hdr));
+
+ if (mh->ver != 1)
+ goto out;
+
+ flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ rcu_read_lock();
+
+ /* lookup socket / reasm context, exactly matching (src,dest,tag) */
+ key = mctp_lookup_key(net, skb, mh->src);
+
+ if (flags & MCTP_HDR_FLAG_SOM) {
+ if (key) {
+ msk = container_of(key->sk, struct mctp_sock, sk);
+ } else {
+ /* first response to a broadcast? do a more general
+ * key lookup to find the socket, but don't use this
+ * key for reassembly - we'll create a more specific
+ * one for future packets if required (ie, !EOM).
+ */
+ key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+ if (key) {
+ msk = container_of(key->sk,
+ struct mctp_sock, sk);
+ key = NULL;
+ }
+ }
+
+ if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
+ msk = mctp_lookup_bind(net, skb);
+
+ if (!msk) {
+ rc = -ENOENT;
+ goto out_unlock;
+ }
+
+ /* single-packet message? deliver to socket, clean up any
+ * pending key.
+ */
+ if (flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(&msk->sk, skb);
+ if (key) {
+ spin_lock_irqsave(&key->reasm_lock, f);
+ /* we've hit a pending reassembly; not much we
+ * can do but drop it
+ */
+ __mctp_key_unlock_drop(key, net, f);
+ }
+ rc = 0;
+ goto out_unlock;
+ }
+
+ /* broadcast response or a bind() - create a key for further
+ * packets for this message
+ */
+ if (!key) {
+ key = mctp_key_alloc(msk, mh->dest, mh->src,
+ tag, GFP_ATOMIC);
+ if (!key) {
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /* we can queue without the reasm lock here, as the
+ * key isn't observable yet
+ */
+ mctp_frag_queue(key, skb);
+
+ /* if the key_add fails, we've raced with another
+ * SOM packet with the same src, dest and tag. There's
+ * no way to distinguish future packets, so all we
+ * can do is drop; we'll free the skb on exit from
+ * this function.
+ */
+ rc = mctp_key_add(key, msk);
+ if (rc)
+ kfree(key);
+
+ } else {
+ /* existing key: start reassembly */
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ if (key->reasm_head || key->reasm_dead) {
+ /* duplicate start? drop everything */
+ __mctp_key_unlock_drop(key, net, f);
+ rc = -EEXIST;
+ } else {
+ rc = mctp_frag_queue(key, skb);
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+ }
+
+ } else if (key) {
+ /* this packet continues a previous message; reassemble
+ * using the message-specific key
+ */
+
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ /* we need to be continuing an existing reassembly... */
+ if (!key->reasm_head)
+ rc = -EINVAL;
+ else
+ rc = mctp_frag_queue(key, skb);
+
+ /* end of message? deliver to socket, and we're done with
+ * the reassembly/response key
+ */
+ if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(key->sk, key->reasm_head);
+ key->reasm_head = NULL;
+ __mctp_key_unlock_drop(key, net, f);
+ } else {
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+
+ } else {
+ /* not a start, no matching key */
+ rc = -ENOENT;
+ }
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ if (rc)
+ kfree_skb(skb);
+ return rc;
+}
+
+static unsigned int mctp_route_mtu(struct mctp_route *rt)
+{
+ return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
+}
+
+static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ char daddr_buf[MAX_ADDR_LEN];
+ char *daddr = NULL;
+ unsigned int mtu;
+ int rc;
+
+ skb->protocol = htons(ETH_P_MCTP);
+
+ mtu = READ_ONCE(skb->dev->mtu);
+ if (skb->len > mtu) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* If lookup fails let the device handle daddr==NULL */
+ if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+ daddr = daddr_buf;
+
+ rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ daddr, skb->dev->dev_addr, skb->len);
+ if (rc) {
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+
+ rc = dev_queue_xmit(skb);
+ if (rc)
+ rc = net_xmit_errno(rc);
+
+ return rc;
+}
+
+/* route alloc/release */
+static void mctp_route_release(struct mctp_route *rt)
+{
+ if (refcount_dec_and_test(&rt->refs)) {
+ dev_put(rt->dev->dev);
+ kfree_rcu(rt, rcu);
+ }
+}
+
+/* returns a route with the refcount at 1 */
+static struct mctp_route *mctp_route_alloc(void)
+{
+ struct mctp_route *rt;
+
+ rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ INIT_LIST_HEAD(&rt->list);
+ refcount_set(&rt->refs, 1);
+ rt->output = mctp_route_discard;
+
+ return rt;
+}
+
+unsigned int mctp_default_net(struct net *net)
+{
+ return READ_ONCE(net->mctp.default_net);
+}
+
+int mctp_default_net_set(struct net *net, unsigned int index)
+{
+ if (index == 0)
+ return -EINVAL;
+ WRITE_ONCE(net->mctp.default_net, index);
+ return 0;
+}
+
+/* tag management */
+static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
+ struct mctp_sock *msk)
+{
+ struct netns_mctp *mns = &net->mctp;
+
+ lockdep_assert_held(&mns->keys_lock);
+
+ /* we hold the net->key_lock here, allowing updates to both
+ * then net and sk
+ */
+ hlist_add_head_rcu(&key->hlist, &mns->keys);
+ hlist_add_head_rcu(&key->sklist, &msk->keys);
+}
+
+/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+ * it for the socket msk
+ */
+static int mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+ int rc = -EAGAIN;
+ u8 tagbits;
+
+ /* be optimistic, alloc now */
+ key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
+ if (!key)
+ return -ENOMEM;
+
+ /* 8 possible tag values */
+ tagbits = 0xff;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ /* Walk through the existing keys, looking for potential conflicting
+ * tags. If we find a conflict, clear that bit from tagbits
+ */
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ /* if we don't own the tag, it can't conflict */
+ if (tmp->tag & MCTP_HDR_FLAG_TO)
+ continue;
+
+ if ((tmp->peer_addr == daddr ||
+ tmp->peer_addr == MCTP_ADDR_ANY) &&
+ tmp->local_addr == saddr)
+ tagbits &= ~(1 << tmp->tag);
+
+ if (!tagbits)
+ break;
+ }
+
+ if (tagbits) {
+ key->tag = __ffs(tagbits);
+ mctp_reserve_tag(net, key, msk);
+ *tagp = key->tag;
+ rc = 0;
+ }
+
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!tagbits)
+ kfree(key);
+
+ return rc;
+}
+
+/* routing lookups */
+static bool mctp_rt_match_eid(struct mctp_route *rt,
+ unsigned int net, mctp_eid_t eid)
+{
+ return READ_ONCE(rt->dev->net) == net &&
+ rt->min <= eid && rt->max >= eid;
+}
+
+/* compares match, used for duplicate prevention */
+static bool mctp_rt_compare_exact(struct mctp_route *rt1,
+ struct mctp_route *rt2)
+{
+ ASSERT_RTNL();
+ return rt1->dev->net == rt2->dev->net &&
+ rt1->min == rt2->min &&
+ rt1->max == rt2->max;
+}
+
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr)
+{
+ struct mctp_route *tmp, *rt = NULL;
+
+ list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+ /* TODO: add metrics */
+ if (mctp_rt_match_eid(tmp, dnet, daddr)) {
+ if (refcount_inc_not_zero(&tmp->refs)) {
+ rt = tmp;
+ break;
+ }
+ }
+ }
+
+ return rt;
+}
+
+/* sends a skb to rt and releases the route. */
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+{
+ int rc;
+
+ rc = rt->output(rt, skb);
+ mctp_route_release(rt);
+ return rc;
+}
+
+static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
+ unsigned int mtu, u8 tag)
+{
+ const unsigned int hlen = sizeof(struct mctp_hdr);
+ struct mctp_hdr *hdr, *hdr2;
+ unsigned int pos, size;
+ struct sk_buff *skb2;
+ int rc;
+ u8 seq;
+
+ hdr = mctp_hdr(skb);
+ seq = 0;
+ rc = 0;
+
+ if (mtu < hlen + 1) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* we've got the header */
+ skb_pull(skb, hlen);
+
+ for (pos = 0; pos < skb->len;) {
+ /* size of message payload */
+ size = min(mtu - hlen, skb->len - pos);
+
+ skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+ if (!skb2) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ /* generic skb copy */
+ skb2->protocol = skb->protocol;
+ skb2->priority = skb->priority;
+ skb2->dev = skb->dev;
+ memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
+
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+
+ /* establish packet */
+ skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+ skb_reset_network_header(skb2);
+ skb_put(skb2, hlen + size);
+ skb2->transport_header = skb2->network_header + hlen;
+
+ /* copy header fields, calculate SOM/EOM flags & seq */
+ hdr2 = mctp_hdr(skb2);
+ hdr2->ver = hdr->ver;
+ hdr2->dest = hdr->dest;
+ hdr2->src = hdr->src;
+ hdr2->flags_seq_tag = tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ if (pos == 0)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
+
+ if (pos + size == skb->len)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
+
+ hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
+
+ /* copy message payload */
+ skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
+
+ /* do route, but don't drop the rt reference */
+ rc = rt->output(rt, skb2);
+ if (rc)
+ break;
+
+ seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
+ pos += size;
+ }
+
+ mctp_route_release(rt);
+ consume_skb(skb);
+ return rc;
+}
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *hdr;
+ unsigned long flags;
+ unsigned int mtu;
+ mctp_eid_t saddr;
+ int rc;
+ u8 tag;
+
+ if (WARN_ON(!rt->dev))
+ return -EINVAL;
+
+ spin_lock_irqsave(&rt->dev->addrs_lock, flags);
+ if (rt->dev->num_addrs == 0) {
+ rc = -EHOSTUNREACH;
+ } else {
+ /* use the outbound interface's first address as our source */
+ saddr = rt->dev->addrs[0];
+ rc = 0;
+ }
+ spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
+
+ if (rc)
+ return rc;
+
+ if (req_tag & MCTP_HDR_FLAG_TO) {
+ rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (rc)
+ return rc;
+ tag |= MCTP_HDR_FLAG_TO;
+ } else {
+ tag = req_tag;
+ }
+
+
+ skb->protocol = htons(ETH_P_MCTP);
+ skb->priority = 0;
+ skb_reset_transport_header(skb);
+ skb_push(skb, sizeof(struct mctp_hdr));
+ skb_reset_network_header(skb);
+ skb->dev = rt->dev->dev;
+
+ /* cb->net will have been set on initial ingress */
+ cb->src = saddr;
+
+ /* set up common header fields */
+ hdr = mctp_hdr(skb);
+ hdr->ver = 1;
+ hdr->dest = daddr;
+ hdr->src = saddr;
+
+ mtu = mctp_route_mtu(rt);
+
+ if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
+ tag;
+ return mctp_do_route(rt, skb);
+ } else {
+ return mctp_do_fragment_route(rt, skb, mtu, tag);
+ }
+}
+
+/* route management */
+static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent, unsigned int mtu,
+ unsigned char type)
+{
+ int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *ert;
+
+ if (!mctp_address_ok(daddr_start))
+ return -EINVAL;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ switch (type) {
+ case RTN_LOCAL:
+ rtfn = mctp_route_input;
+ break;
+ case RTN_UNICAST:
+ rtfn = mctp_route_output;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rt = mctp_route_alloc();
+ if (!rt)
+ return -ENOMEM;
+
+ rt->min = daddr_start;
+ rt->max = daddr_start + daddr_extent;
+ rt->mtu = mtu;
+ rt->dev = mdev;
+ dev_hold(rt->dev->dev);
+ rt->type = type;
+ rt->output = rtfn;
+
+ ASSERT_RTNL();
+ /* Prevent duplicate identical routes. */
+ list_for_each_entry(ert, &net->mctp.routes, list) {
+ if (mctp_rt_compare_exact(rt, ert)) {
+ mctp_route_release(rt);
+ return -EEXIST;
+ }
+ }
+
+ list_add_rcu(&rt->list, &net->mctp.routes);
+
+ return 0;
+}
+
+static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+ mctp_eid_t daddr_end;
+ bool dropped;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ daddr_end = daddr_start + daddr_extent;
+ dropped = false;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev &&
+ rt->min == daddr_start && rt->max == daddr_end) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ dropped = true;
+ }
+ }
+
+ return dropped ? 0 : -ENOENT;
+}
+
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_remove(mdev, addr, 0);
+}
+
+/* removes all entries for a given device */
+void mctp_route_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+
+ ASSERT_RTNL();
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ }
+ }
+}
+
+/* Incoming packet-handling */
+
+static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct mctp_hdr *mh;
+
+ /* basic non-data sanity checks */
+ if (dev->type != ARPHRD_MCTP)
+ goto err_drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
+ goto err_drop;
+
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+
+ /* We have enough for a header; decode and route */
+ mh = mctp_hdr(skb);
+ if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
+ goto err_drop;
+
+ cb = __mctp_cb(skb);
+ rcu_read_lock();
+ cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
+ rcu_read_unlock();
+
+ rt = mctp_route_lookup(net, cb->net, mh->dest);
+ if (!rt)
+ goto err_drop;
+
+ mctp_do_route(rt, skb);
+
+ return NET_RX_SUCCESS;
+
+err_drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static struct packet_type mctp_packet_type = {
+ .type = cpu_to_be16(ETH_P_MCTP),
+ .func = mctp_pkttype_receive,
+};
+
+/* netlink interface */
+
+static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
+ [RTA_DST] = { .type = NLA_U8 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_OIF] = { .type = NLA_U32 },
+};
+
+/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
+ * tb must hold RTA_MAX+1 elements.
+ */
+static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb, struct rtmsg **rtm,
+ struct mctp_dev **mdev, mctp_eid_t *daddr_start)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ unsigned int ifindex;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
+ rta_mctp_policy, extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[RTA_DST]) {
+ NL_SET_ERR_MSG(extack, "dst EID missing");
+ return -EINVAL;
+ }
+ *daddr_start = nla_get_u8(tb[RTA_DST]);
+
+ if (!tb[RTA_OIF]) {
+ NL_SET_ERR_MSG(extack, "ifindex missing");
+ return -EINVAL;
+ }
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ *rtm = nlmsg_data(nlh);
+ if ((*rtm)->rtm_family != AF_MCTP) {
+ NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "bad ifindex");
+ return -ENODEV;
+ }
+ *mdev = mctp_dev_get_rtnl(dev);
+ if (!*mdev)
+ return -ENODEV;
+
+ if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "no routes to loopback");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ unsigned int mtu;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ if (rtm->rtm_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
+ return -EINVAL;
+ }
+
+ /* TODO: parse mtu from nlparse */
+ mtu = 0;
+
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+ rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
+ rtm->rtm_type);
+ return rc;
+}
+
+static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ /* we only have unicast routes */
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+ rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
+ return rc;
+}
+
+static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
+ u32 portid, u32 seq, int event, unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ struct rtmsg *hdr;
+ void *metrics;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->rtm_family = AF_MCTP;
+
+ /* we use the _len fields as a number of EIDs, rather than
+ * a number of bits in the address
+ */
+ hdr->rtm_dst_len = rt->max - rt->min;
+ hdr->rtm_src_len = 0;
+ hdr->rtm_tos = 0;
+ hdr->rtm_table = RT_TABLE_DEFAULT;
+ hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
+ hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
+ hdr->rtm_type = rt->type;
+
+ if (nla_put_u8(skb, RTA_DST, rt->min))
+ goto cancel;
+
+ metrics = nla_nest_start_noflag(skb, RTA_METRICS);
+ if (!metrics)
+ goto cancel;
+
+ if (rt->mtu) {
+ if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
+ goto cancel;
+ }
+
+ nla_nest_end(skb, metrics);
+
+ if (rt->dev) {
+ if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
+ goto cancel;
+ }
+
+ /* TODO: conditional neighbour physaddr? */
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mctp_route *rt;
+ int s_idx, idx;
+
+ /* TODO: allow filtering on route data, possibly under
+ * cb->strict_check
+ */
+
+ /* TODO: change to struct overlay */
+ s_idx = cb->args[0];
+ idx = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (idx++ < s_idx)
+ continue;
+ if (mctp_fill_rtinfo(skb, rt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+/* net namespace implementation */
+static int __net_init mctp_routes_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->routes);
+ INIT_HLIST_HEAD(&ns->binds);
+ mutex_init(&ns->bind_lock);
+ INIT_HLIST_HEAD(&ns->keys);
+ spin_lock_init(&ns->keys_lock);
+ WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
+ return 0;
+}
+
+static void __net_exit mctp_routes_net_exit(struct net *net)
+{
+ struct mctp_route *rt;
+
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list)
+ mctp_route_release(rt);
+}
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_routes_net_init,
+ .exit = mctp_routes_net_exit,
+};
+
+int __init mctp_routes_init(void)
+{
+ dev_add_pack(&mctp_packet_type);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
+ NULL, mctp_dump_rtinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
+ mctp_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
+ mctp_delroute, NULL, 0);
+
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_routes_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+ rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
+ rtnl_unregister(PF_MCTP, RTM_GETROUTE);
+ dev_remove_pack(&mctp_packet_type);
+}