Merge branch 'ebpf-next'

Alexei Starovoitov says: ==================== allow eBPF programs to be attached to sockets V1->V2: fixed comments in sample code to state clearly that packet data is accessed with LD_ABS instructions and not internal skb fields. Also replaced constants in: BPF_LD_ABS(BPF_B, 14 + 9 /* R0 = ip->proto */), with: BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */), V1 cover: Introduce BPF_PROG_TYPE_SOCKET_FILTER type of eBPF programs that can be attached to sockets with setsockopt(). Allow such programs to access maps via lookup/update/delete helpers. This feature was previewed by bpf manpage in commit b4fc1a460f30("Merge branch 'bpf-next'") Now it can actually run. 1st patch adds LD_ABS/LD_IND instruction verification and 2nd patch adds new setsockopt() flag. Patches 3-6 are examples in assembler and in C. Though native eBPF programs are way more powerful than classic filters (attachable through similar setsockopt() call), they don't have skb field accessors yet. Like skb->pkt_type, skb->dev->ifindex are not accessible. There are sevaral ways to achieve that. That will be in the next set of patches. So in this set native eBPF programs can only read data from packet and access maps. The most powerful example is sockex2_kern.c from patch 6 where ~200 lines of C are compiled into ~300 of eBPF instructions. It shows how quite complex packet parsing can be done. LLVM used to build examples is at https://github.com/iovisor/llvm which is fork of llvm trunk that I'm cleaning up for upstreaming. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2014-12-05 21:47:48 -0800
committer: David S. Miller <davem@davemloft.net> 2014-12-05 21:47:48 -0800
commit: 8d0c4697534a739725e429ff062dea393d8860d1 (patch)
tree: ee28163a6c53e0131fd2d3d626d02b0610eaed2b /net
parent: f51a5e82ea9aaf05106c00d976e772ca384a9199 (diff)
parent: fbe3310840c65f3cf97dd90d23e177d061c376f2 (diff)
2 files changed, 108 insertions, 2 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 647b12265e18..8cc3c03078b3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -44,6 +44,7 @@
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
+#include <linux/bpf.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -813,8 +814,12 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
 
 static void __bpf_prog_release(struct bpf_prog *prog)
 {
-	bpf_release_orig_filter(prog);
-	bpf_prog_free(prog);
+	if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+		bpf_prog_put(prog);
+	} else {
+		bpf_release_orig_filter(prog);
+		bpf_prog_free(prog);
+	}
 }
 
 static void __sk_filter_release(struct sk_filter *fp)
@@ -1088,6 +1093,94 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_attach_filter);
 
+#ifdef CONFIG_BPF_SYSCALL
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+	struct sk_filter *fp, *old_fp;
+	struct bpf_prog *prog;
+
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
+	prog = bpf_prog_get(ufd);
+	if (!prog)
+		return -EINVAL;
+
+	if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
+		/* valid fd, but invalid program type */
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp) {
+		bpf_prog_put(prog);
+		return -ENOMEM;
+	}
+	fp->prog = prog;
+
+	atomic_set(&fp->refcnt, 0);
+
+	if (!sk_filter_charge(sk, fp)) {
+		__sk_filter_release(fp);
+		return -ENOMEM;
+	}
+
+	old_fp = rcu_dereference_protected(sk->sk_filter,
+					   sock_owned_by_user(sk));
+	rcu_assign_pointer(sk->sk_filter, fp);
+
+	if (old_fp)
+		sk_filter_uncharge(sk, old_fp);
+
+	return 0;
+}
+
+/* allow socket filters to call
+ * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
+ */
+static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	default:
+		return NULL;
+	}
+}
+
+static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
+{
+	/* skb fields cannot be accessed yet */
+	return false;
+}
+
+static struct bpf_verifier_ops sock_filter_ops = {
+	.get_func_proto = sock_filter_func_proto,
+	.is_valid_access = sock_filter_is_valid_access,
+};
+
+static struct bpf_prog_type_list tl = {
+	.ops = &sock_filter_ops,
+	.type = BPF_PROG_TYPE_SOCKET_FILTER,
+};
+
+static int __init register_sock_filter_ops(void)
+{
+	bpf_register_prog_type(&tl);
+	return 0;
+}
+late_initcall(register_sock_filter_ops);
+#else
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/sock.c b/net/core/sock.c
index 0725cf0cb685..9a56b2000c3f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -888,6 +888,19 @@ set_rcvbuf:
 		}
 		break;
 
+	case SO_ATTACH_BPF:
+		ret = -EINVAL;
+		if (optlen == sizeof(u32)) {
+			u32 ufd;
+
+			ret = -EFAULT;
+			if (copy_from_user(&ufd, optval, sizeof(ufd)))
+				break;
+
+			ret = sk_attach_bpf(ufd, sk);
+		}
+		break;
+
 	case SO_DETACH_FILTER:
 		ret = sk_detach_filter(sk);
 		break;
author	David S. Miller <davem@davemloft.net>	2014-12-05 21:47:48 -0800
committer	David S. Miller <davem@davemloft.net>	2014-12-05 21:47:48 -0800
commit	8d0c4697534a739725e429ff062dea393d8860d1 (patch)
tree	ee28163a6c53e0131fd2d3d626d02b0610eaed2b /net
parent	f51a5e82ea9aaf05106c00d976e772ca384a9199 (diff)
parent	fbe3310840c65f3cf97dd90d23e177d061c376f2 (diff)