From 9354d452034273a50a4fd703bea31e5d6b1fc20b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 2 Nov 2017 17:04:37 -0200 Subject: openvswitch: reliable interface indentification in port dumps This patch allows reliable identification of netdevice interfaces connected to openvswitch bridges. In particular, user space queries the netdev interfaces belonging to the ports for statistics, up/down state, etc. Datapath dump needs to provide enough information for the user space to be able to do that. Currently, only interface names are returned. This is not sufficient, as openvswitch allows its ports to be in different name spaces and the interface name is valid only in its name space. What is needed and generally used in other netlink APIs, is the pair ifindex+netnsid. The solution is addition of the ifindex+netnsid pair (or only ifindex if in the same name space) to vport get/dump operation. On request side, ideally the ifindex+netnsid pair could be used to get/set/del the corresponding vport. This is not implemented by this patch and can be added later if needed. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 47 +++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index c3aec6227c91..4d38ac044cee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1848,7 +1848,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 portid, u32 seq, u32 flags, u8 cmd) + struct net *net, u32 portid, u32 seq, + u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@ -1864,9 +1865,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, - ovs_vport_name(vport))) + ovs_vport_name(vport)) || + nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; + if (!net_eq(net, dev_net(vport->dev))) { + int id = peernet2id_alloc(net, dev_net(vport->dev)); + + if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) + goto nla_put_failure; + } + ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), &vport_stats, @@ -1896,8 +1905,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void) } /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, - u32 seq, u8 cmd) +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -1906,7 +1915,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; @@ -1920,6 +1929,8 @@ static struct vport *lookup_vport(struct net *net, struct datapath *dp; struct vport *vport; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return ERR_PTR(-EOPNOTSUPP); if (a[OVS_VPORT_ATTR_NAME]) { vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) @@ -1944,6 +1955,7 @@ static struct vport *lookup_vport(struct net *net, return vport; } else return ERR_PTR(-EINVAL); + } /* Called with ovs_mutex */ @@ -1983,6 +1995,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) return -EINVAL; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return -EOPNOTSUPP; port_no = a[OVS_VPORT_ATTR_PORT_NO] ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; @@ -2032,8 +2046,9 @@ restart: goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) update_headroom(dp); @@ -2090,8 +2105,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); ovs_unlock(); @@ -2128,8 +2144,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_DEL); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_DEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ @@ -2169,8 +2186,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); rcu_read_unlock(); @@ -2202,6 +2220,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, + sock_net(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -2228,6 +2247,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, + [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, }; static const struct genl_ops dp_vport_genl_ops[] = { -- cgit v1.2.3-70-g09d2 From 9602c01e57f7b868d748c2ba2aef0efa64b71ffc Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 10 Nov 2017 12:09:41 -0800 Subject: openvswitch: export get_dp() API. Later patches will invoke get_dp() outside of datapath.c. Export it. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 29 ----------------------------- net/openvswitch/datapath.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 29 deletions(-) (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4d38ac044cee..6e098035bb8f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -142,35 +142,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *, uint32_t cutlen); -/* Must be called with rcu_read_lock. */ -static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) -{ - struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); - - if (dev) { - struct vport *vport = ovs_internal_dev_get_vport(dev); - if (vport) - return vport->dp; - } - - return NULL; -} - -/* The caller must hold either ovs_mutex or rcu_read_lock to keep the - * returned dp pointer valid. - */ -static inline struct datapath *get_dp(struct net *net, int dp_ifindex) -{ - struct datapath *dp; - - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); - rcu_read_lock(); - dp = get_dp_rcu(net, dp_ifindex); - rcu_read_unlock(); - - return dp; -} - /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4a104ef9e12c..954c4ed465a5 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -30,6 +30,7 @@ #include "conntrack.h" #include "flow.h" #include "flow_table.h" +#include "vport-internal_dev.h" #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 @@ -190,6 +191,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n return ovs_lookup_vport(dp, port_no); } +/* Must be called with rcu_read_lock. */ +static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) +{ + struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); + + if (dev) { + struct vport *vport = ovs_internal_dev_get_vport(dev); + + if (vport) + return vport->dp; + } + + return NULL; +} + +/* The caller must hold either ovs_mutex or rcu_read_lock to keep the + * returned dp pointer valid. + */ +static inline struct datapath *get_dp(struct net *net, int dp_ifindex) +{ + struct datapath *dp; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); + rcu_read_lock(); + dp = get_dp_rcu(net, dp_ifindex); + rcu_read_unlock(); + + return dp; +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; -- cgit v1.2.3-70-g09d2 From 96fbc13d7e770b542d2d1fcf700d0baadc6e8063 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Fri, 10 Nov 2017 12:09:42 -0800 Subject: openvswitch: Add meter infrastructure OVS kernel datapath so far does not support Openflow meter action. This is the first stab at adding kernel datapath meter support. This implementation supports only drop band type. Signed-off-by: Andy Zhou Signed-off-by: David S. Miller --- net/openvswitch/Makefile | 1 + net/openvswitch/datapath.c | 14 +- net/openvswitch/datapath.h | 3 + net/openvswitch/meter.c | 604 +++++++++++++++++++++++++++++++++++++++++++++ net/openvswitch/meter.h | 54 ++++ 5 files changed, 674 insertions(+), 2 deletions(-) create mode 100644 net/openvswitch/meter.c create mode 100644 net/openvswitch/meter.h (limited to 'net/openvswitch/datapath.c') diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 299f4476cf44..41109c326f3a 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -12,6 +12,7 @@ openvswitch-y := \ flow.o \ flow_netlink.o \ flow_table.o \ + meter.o \ vport.o \ vport-internal_dev.o \ vport-netdev.o diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6e098035bb8f..0dab33fb9844 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -55,6 +55,7 @@ #include "flow.h" #include "flow_table.h" #include "flow_netlink.h" +#include "meter.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -174,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); kfree(dp->ports); + ovs_meters_exit(dp); kfree(dp); } @@ -1572,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_meters_init(dp); + if (err) + goto err_destroy_ports_array; + /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; @@ -1600,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } - goto err_destroy_ports_array; + goto err_destroy_meters; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1615,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_ports_array: +err_destroy_meters: ovs_unlock(); + ovs_meters_exit(dp); +err_destroy_ports_array: kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -2265,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = { &dp_vport_genl_family, &dp_flow_genl_family, &dp_packet_genl_family, + &dp_meter_genl_family, }; static void dp_unregister_genl(int n_families) @@ -2445,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 954c4ed465a5..5d2997b42460 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -92,6 +92,9 @@ struct datapath { u32 user_features; u32 max_headroom; + + /* Switch meters. */ + struct hlist_head *meters; }; /** diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c new file mode 100644 index 000000000000..2a5ba356c472 --- /dev/null +++ b/net/openvswitch/meter.c @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "datapath.h" +#include "meter.h" + +#define METER_HASH_BUCKETS 1024 + +static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { + [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, + [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, + [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, + [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, + [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, + [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, + [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { + [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, +}; + +static void rcu_free_ovs_meter_callback(struct rcu_head *rcu) +{ + struct dp_meter *meter = container_of(rcu, struct dp_meter, rcu); + + kfree(meter); +} + +static void ovs_meter_free(struct dp_meter *meter) +{ + if (!meter) + return; + + call_rcu(&meter->rcu, rcu_free_ovs_meter_callback); +} + +static struct hlist_head *meter_hash_bucket(const struct datapath *dp, + u32 meter_id) +{ + return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; +} + +/* Call with ovs_mutex or RCU read lock. */ +static struct dp_meter *lookup_meter(const struct datapath *dp, + u32 meter_id) +{ + struct dp_meter *meter; + struct hlist_head *head; + + head = meter_hash_bucket(dp, meter_id); + hlist_for_each_entry_rcu(meter, head, dp_hash_node) { + if (meter->id == meter_id) + return meter; + } + return NULL; +} + +static void attach_meter(struct datapath *dp, struct dp_meter *meter) +{ + struct hlist_head *head = meter_hash_bucket(dp, meter->id); + + hlist_add_head_rcu(&meter->dp_hash_node, head); +} + +static void detach_meter(struct dp_meter *meter) +{ + ASSERT_OVSL(); + if (meter) + hlist_del_rcu(&meter->dp_hash_node); +} + +static struct sk_buff * +ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, + struct ovs_header **ovs_reply_header) +{ + struct sk_buff *skb; + struct ovs_header *ovs_header = info->userhdr; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + *ovs_reply_header = genlmsg_put(skb, info->snd_portid, + info->snd_seq, + &dp_meter_genl_family, 0, cmd); + if (!ovs_reply_header) { + nlmsg_free(skb); + return ERR_PTR(-EMSGSIZE); + } + (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; + + return skb; +} + +static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, + struct dp_meter *meter) +{ + struct nlattr *nla; + struct dp_meter_band *band; + u16 i; + + if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) + goto error; + + if (!meter) + return 0; + + if (nla_put(reply, OVS_METER_ATTR_STATS, + sizeof(struct ovs_flow_stats), &meter->stats) || + nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, + OVS_METER_ATTR_PAD)) + goto error; + + nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + if (!nla) + goto error; + + band = meter->bands; + + for (i = 0; i < meter->n_bands; ++i, ++band) { + struct nlattr *band_nla; + + band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, + sizeof(struct ovs_flow_stats), + &band->stats)) + goto error; + nla_nest_end(reply, band_nla); + } + nla_nest_end(reply, nla); + + return 0; +error: + return -EMSGSIZE; +} + +static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct ovs_header *ovs_reply_header; + struct nlattr *nla, *band_nla; + int err; + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, + &ovs_reply_header); + if (!reply) + return PTR_ERR(reply); + + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || + nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) + goto nla_put_failure; + + nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + if (!nla) + goto nla_put_failure; + + band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + if (!band_nla) + goto nla_put_failure; + /* Currently only DROP band type is supported. */ + if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) + goto nla_put_failure; + nla_nest_end(reply, band_nla); + nla_nest_end(reply, nla); + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +nla_put_failure: + nlmsg_free(reply); + err = -EMSGSIZE; + return err; +} + +static struct dp_meter *dp_meter_create(struct nlattr **a) +{ + struct nlattr *nla; + int rem; + u16 n_bands = 0; + struct dp_meter *meter; + struct dp_meter_band *band; + int err; + + /* Validate attributes, count the bands. */ + if (!a[OVS_METER_ATTR_BANDS]) + return ERR_PTR(-EINVAL); + + nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) + if (++n_bands > DP_MAX_BANDS) + return ERR_PTR(-EINVAL); + + /* Allocate and set up the meter before locking anything. */ + meter = kzalloc(n_bands * sizeof(struct dp_meter_band) + + sizeof(*meter), GFP_KERNEL); + if (!meter) + return ERR_PTR(-ENOMEM); + + meter->used = div_u64(ktime_get_ns(), 1000 * 1000); + meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; + meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; + spin_lock_init(&meter->lock); + if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { + meter->stats = *(struct ovs_flow_stats *) + nla_data(a[OVS_METER_ATTR_STATS]); + } + meter->n_bands = n_bands; + + /* Set up meter bands. */ + band = meter->bands; + nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { + struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; + u32 band_max_delta_t; + + err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, + nla_data(nla), nla_len(nla), band_policy, + NULL); + if (err) + goto exit_free_meter; + + if (!attr[OVS_BAND_ATTR_TYPE] || + !attr[OVS_BAND_ATTR_RATE] || + !attr[OVS_BAND_ATTR_BURST]) { + err = -EINVAL; + goto exit_free_meter; + } + + band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); + band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); + band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); + /* Figure out max delta_t that is enough to fill any bucket. + * Keep max_delta_t size to the bucket units: + * pkts => 1/1000 packets, kilobits => bits. + */ + band_max_delta_t = (band->burst_size + band->rate) * 1000; + /* Start with a full bucket. */ + band->bucket = band_max_delta_t; + if (band_max_delta_t > meter->max_delta_t) + meter->max_delta_t = band_max_delta_t; + band++; + } + + return meter; + +exit_free_meter: + kfree(meter); + return ERR_PTR(err); +} + +static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct dp_meter *meter, *old_meter; + struct sk_buff *reply; + struct ovs_header *ovs_reply_header; + struct ovs_header *ovs_header = info->userhdr; + struct datapath *dp; + int err; + u32 meter_id; + bool failed; + + meter = dp_meter_create(a); + if (IS_ERR_OR_NULL(meter)) + return PTR_ERR(meter); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, + &ovs_reply_header); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + goto exit_free_meter; + } + + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + if (!a[OVS_METER_ATTR_ID]) { + err = -ENODEV; + goto exit_unlock; + } + + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + /* Cannot fail after this. */ + old_meter = lookup_meter(dp, meter_id); + detach_meter(old_meter); + attach_meter(dp, meter); + ovs_unlock(); + + /* Build response with the meter_id and stats from + * the old meter, if any. + */ + failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); + WARN_ON(failed); + if (old_meter) { + spin_lock_bh(&old_meter->lock); + if (old_meter->keep_stats) { + err = ovs_meter_cmd_reply_stats(reply, meter_id, + old_meter); + WARN_ON(err); + } + spin_unlock_bh(&old_meter->lock); + ovs_meter_free(old_meter); + } + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); +exit_free_meter: + kfree(meter); + return err; +} + +static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + u32 meter_id; + struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_reply_header; + struct datapath *dp; + int err; + struct sk_buff *reply; + struct dp_meter *meter; + + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; + + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + ovs_lock(); + + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + /* Locate meter, copy stats. */ + meter = lookup_meter(dp, meter_id); + if (!meter) { + err = -ENOENT; + goto exit_unlock; + } + + spin_lock_bh(&meter->lock); + err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); + spin_unlock_bh(&meter->lock); + if (err) + goto exit_unlock; + + ovs_unlock(); + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); + return err; +} + +static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + u32 meter_id; + struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_reply_header; + struct datapath *dp; + int err; + struct sk_buff *reply; + struct dp_meter *old_meter; + + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + ovs_lock(); + + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + old_meter = lookup_meter(dp, meter_id); + if (old_meter) { + spin_lock_bh(&old_meter->lock); + err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); + WARN_ON(err); + spin_unlock_bh(&old_meter->lock); + detach_meter(old_meter); + } + ovs_unlock(); + ovs_meter_free(old_meter); + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); + return err; +} + +/* Meter action execution. + * + * Return true 'meter_id' drop band is triggered. The 'skb' should be + * dropped by the caller'. + */ +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 meter_id) +{ + struct dp_meter *meter; + struct dp_meter_band *band; + long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); + long long int long_delta_ms; + u32 delta_ms; + u32 cost; + int i, band_exceeded_max = -1; + u32 band_exceeded_rate = 0; + + meter = lookup_meter(dp, meter_id); + /* Do not drop the packet when there is no meter. */ + if (!meter) + return false; + + /* Lock the meter while using it. */ + spin_lock(&meter->lock); + + long_delta_ms = (now_ms - meter->used); /* ms */ + + /* Make sure delta_ms will not be too large, so that bucket will not + * wrap around below. + */ + delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) + ? meter->max_delta_t : (u32)long_delta_ms; + + /* Update meter statistics. + */ + meter->used = now_ms; + meter->stats.n_packets += 1; + meter->stats.n_bytes += skb->len; + + /* Bucket rate is either in kilobits per second, or in packets per + * second. We maintain the bucket in the units of either bits or + * 1/1000th of a packet, correspondingly. + * Then, when rate is multiplied with milliseconds, we get the + * bucket units: + * msec * kbps = bits, and + * msec * packets/sec = 1/1000 packets. + * + * 'cost' is the number of bucket units in this packet. + */ + cost = (meter->kbps) ? skb->len * 8 : 1000; + + /* Update all bands and find the one hit with the highest rate. */ + for (i = 0; i < meter->n_bands; ++i) { + long long int max_bucket_size; + + band = &meter->bands[i]; + max_bucket_size = (band->burst_size + band->rate) * 1000; + + band->bucket += delta_ms * band->rate; + if (band->bucket > max_bucket_size) + band->bucket = max_bucket_size; + + if (band->bucket >= cost) { + band->bucket -= cost; + } else if (band->rate > band_exceeded_rate) { + band_exceeded_rate = band->rate; + band_exceeded_max = i; + } + } + + if (band_exceeded_max >= 0) { + /* Update band statistics. */ + band = &meter->bands[band_exceeded_max]; + band->stats.n_packets += 1; + band->stats.n_bytes += skb->len; + + /* Drop band triggered, let the caller drop the 'skb'. */ + if (band->type == OVS_METER_BAND_TYPE_DROP) { + spin_unlock(&meter->lock); + return true; + } + } + + spin_unlock(&meter->lock); + return false; +} + +static struct genl_ops dp_meter_genl_ops[] = { + { .cmd = OVS_METER_CMD_FEATURES, + .flags = 0, /* OK for unprivileged users. */ + .policy = meter_policy, + .doit = ovs_meter_cmd_features + }, + { .cmd = OVS_METER_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ + .policy = meter_policy, + .doit = ovs_meter_cmd_set, + }, + { .cmd = OVS_METER_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = meter_policy, + .doit = ovs_meter_cmd_get, + }, + { .cmd = OVS_METER_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ + .policy = meter_policy, + .doit = ovs_meter_cmd_del + }, +}; + +static const struct genl_multicast_group ovs_meter_multicast_group = { + .name = OVS_METER_MCGROUP, +}; + +struct genl_family dp_meter_genl_family __ro_after_init = { + .hdrsize = sizeof(struct ovs_header), + .name = OVS_METER_FAMILY, + .version = OVS_METER_VERSION, + .maxattr = OVS_METER_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_meter_genl_ops, + .n_ops = ARRAY_SIZE(dp_meter_genl_ops), + .mcgrps = &ovs_meter_multicast_group, + .n_mcgrps = 1, + .module = THIS_MODULE, +}; + +int ovs_meters_init(struct datapath *dp) +{ + int i; + + dp->meters = kmalloc_array(METER_HASH_BUCKETS, + sizeof(struct hlist_head), GFP_KERNEL); + + if (!dp->meters) + return -ENOMEM; + + for (i = 0; i < METER_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->meters[i]); + + return 0; +} + +void ovs_meters_exit(struct datapath *dp) +{ + int i; + + for (i = 0; i < METER_HASH_BUCKETS; i++) { + struct hlist_head *head = &dp->meters[i]; + struct dp_meter *meter; + struct hlist_node *n; + + hlist_for_each_entry_safe(meter, n, head, dp_hash_node) + kfree(meter); + } + + kfree(dp->meters); +} diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h new file mode 100644 index 000000000000..964ace2650f8 --- /dev/null +++ b/net/openvswitch/meter.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#ifndef METER_H +#define METER_H 1 + +#include +#include +#include +#include +#include +#include +#include + +#include "flow.h" +struct datapath; + +#define DP_MAX_BANDS 1 + +struct dp_meter_band { + u32 type; + u32 rate; + u32 burst_size; + u32 bucket; /* 1/1000 packets, or in bits */ + struct ovs_flow_stats stats; +}; + +struct dp_meter { + spinlock_t lock; /* Per meter lock */ + struct rcu_head rcu; + struct hlist_node dp_hash_node; /*Element in datapath->meters + * hash table. + */ + u32 id; + u16 kbps:1, keep_stats:1; + u16 n_bands; + u32 max_delta_t; + u64 used; + struct ovs_flow_stats stats; + struct dp_meter_band bands[]; +}; + +extern struct genl_family dp_meter_genl_family; +int ovs_meters_init(struct datapath *dp); +void ovs_meters_exit(struct datapath *dp); +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 meter_id); + +#endif /* meter.h */ -- cgit v1.2.3-70-g09d2