summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/net/busy_poller.c
blob: 99b0e8c17fcade73602ac408c7f23c12c28e9543 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// SPDX-License-Identifier: GPL-2.0
#include <assert.h>
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ynl.h>

#include <arpa/inet.h>
#include <netinet/in.h>

#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>

#include <linux/genetlink.h>
#include <linux/netlink.h>

#include "netdev-user.h"

/* The below ifdef blob is required because:
 *
 * - sys/epoll.h does not (yet) have the ioctl definitions included. So,
 *   systems with older glibcs will not have them available. However,
 *   sys/epoll.h does include the type definition for epoll_data, which is
 *   needed by the user program (e.g. epoll_event.data.fd)
 *
 * - linux/eventpoll.h does not define the epoll_data type, it is simply an
 *   opaque __u64. It does, however, include the ioctl definition.
 *
 * Including both headers is impossible (types would be redefined), so I've
 * opted instead to take sys/epoll.h, and include the blob below.
 *
 * Someday, when glibc is globally up to date, the blob below can be removed.
 */
#if !defined(EPOLL_IOC_TYPE)
struct epoll_params {
	uint32_t busy_poll_usecs;
	uint16_t busy_poll_budget;
	uint8_t prefer_busy_poll;

	/* pad the struct to a multiple of 64bits */
	uint8_t __pad;
};

#define EPOLL_IOC_TYPE 0x8A
#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
#endif

static uint32_t cfg_port = 8000;
static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
static char *cfg_outfile;
static int cfg_max_events = 8;
static int cfg_ifindex;

/* busy poll params */
static uint32_t cfg_busy_poll_usecs;
static uint32_t cfg_busy_poll_budget;
static uint32_t cfg_prefer_busy_poll;

/* IRQ params */
static uint32_t cfg_defer_hard_irqs;
static uint64_t cfg_gro_flush_timeout;
static uint64_t cfg_irq_suspend_timeout;

static void usage(const char *filepath)
{
	error(1, 0,
	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
	      filepath);
}

static void parse_opts(int argc, char **argv)
{
	int ret;
	int c;

	if (argc <= 1)
		usage(argv[0]);

	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
		switch (c) {
		case 'u':
			cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
			if (cfg_busy_poll_usecs == ULONG_MAX ||
			    cfg_busy_poll_usecs > UINT32_MAX)
				error(1, ERANGE, "busy_poll_usecs too large");
			break;
		case 'P':
			cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
			if (cfg_prefer_busy_poll == ULONG_MAX ||
			    cfg_prefer_busy_poll > 1)
				error(1, ERANGE,
				      "prefer busy poll should be 0 or 1");
			break;
		case 'g':
			cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
			if (cfg_busy_poll_budget == ULONG_MAX ||
			    cfg_busy_poll_budget > UINT16_MAX)
				error(1, ERANGE,
				      "busy poll budget must be [0, UINT16_MAX]");
			break;
		case 'p':
			cfg_port = strtoul(optarg, NULL, 0);
			if (cfg_port > UINT16_MAX)
				error(1, ERANGE, "port must be <= 65535");
			break;
		case 'b':
			ret = inet_aton(optarg, &cfg_bind_addr);
			if (ret == 0)
				error(1, errno,
				      "bind address %s invalid", optarg);
			break;
		case 'o':
			cfg_outfile = strdup(optarg);
			if (!cfg_outfile)
				error(1, 0, "outfile invalid");
			break;
		case 'm':
			cfg_max_events = strtol(optarg, NULL, 0);

			if (cfg_max_events == LONG_MIN ||
			    cfg_max_events == LONG_MAX ||
			    cfg_max_events <= 0)
				error(1, ERANGE,
				      "max events must be > 0 and < LONG_MAX");
			break;
		case 'd':
			cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);

			if (cfg_defer_hard_irqs == ULONG_MAX ||
			    cfg_defer_hard_irqs > INT32_MAX)
				error(1, ERANGE,
				      "defer_hard_irqs must be <= INT32_MAX");
			break;
		case 'r':
			cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);

			if (cfg_gro_flush_timeout == ULLONG_MAX)
				error(1, ERANGE,
				      "gro_flush_timeout must be < ULLONG_MAX");
			break;
		case 's':
			cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);

			if (cfg_irq_suspend_timeout == ULLONG_MAX)
				error(1, ERANGE,
				      "irq_suspend_timeout must be < ULLONG_MAX");
			break;
		case 'i':
			cfg_ifindex = strtoul(optarg, NULL, 0);
			if (cfg_ifindex == ULONG_MAX)
				error(1, ERANGE,
				      "ifindex must be < ULONG_MAX");
			break;
		}
	}

	if (!cfg_ifindex)
		usage(argv[0]);

	if (optind != argc)
		usage(argv[0]);
}

static void epoll_ctl_add(int epfd, int fd, uint32_t events)
{
	struct epoll_event ev;

	ev.events = events;
	ev.data.fd = fd;
	if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
		error(1, errno, "epoll_ctl add fd: %d", fd);
}

static void setnonblock(int sockfd)
{
	int flags;

	flags = fcntl(sockfd, F_GETFL, 0);

	if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
		error(1, errno, "unable to set socket to nonblocking mode");
}

static void write_chunk(int fd, char *buf, ssize_t buflen)
{
	ssize_t remaining = buflen;
	char *buf_offset = buf;
	ssize_t writelen = 0;
	ssize_t write_result;

	while (writelen < buflen) {
		write_result = write(fd, buf_offset, remaining);
		if (write_result == -1)
			error(1, errno, "unable to write data to outfile");

		writelen += write_result;
		remaining -= write_result;
		buf_offset += write_result;
	}
}

static void setup_queue(void)
{
	struct netdev_napi_get_list *napi_list = NULL;
	struct netdev_napi_get_req_dump *req = NULL;
	struct netdev_napi_set_req *set_req = NULL;
	struct ynl_sock *ys;
	struct ynl_error yerr;
	uint32_t napi_id;

	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
	if (!ys)
		error(1, 0, "YNL: %s", yerr.msg);

	req = netdev_napi_get_req_dump_alloc();
	netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex);
	napi_list = netdev_napi_get_dump(ys, req);

	/* assume there is 1 NAPI configured and take the first */
	if (napi_list->obj._present.id)
		napi_id = napi_list->obj.id;
	else
		error(1, 0, "napi ID not present?");

	set_req = netdev_napi_set_req_alloc();
	netdev_napi_set_req_set_id(set_req, napi_id);
	netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs);
	netdev_napi_set_req_set_gro_flush_timeout(set_req,
						  cfg_gro_flush_timeout);
	netdev_napi_set_req_set_irq_suspend_timeout(set_req,
						    cfg_irq_suspend_timeout);

	if (netdev_napi_set(ys, set_req))
		error(1, 0, "can't set NAPI params: %s\n", yerr.msg);

	netdev_napi_get_list_free(napi_list);
	netdev_napi_get_req_dump_free(req);
	netdev_napi_set_req_free(set_req);
	ynl_sock_destroy(ys);
}

static void run_poller(void)
{
	struct epoll_event events[cfg_max_events];
	struct epoll_params epoll_params = {0};
	struct sockaddr_in server_addr;
	int i, epfd, nfds;
	ssize_t readlen;
	int outfile_fd;
	char buf[1024];
	int sockfd;
	int conn;
	int val;

	outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
	if (outfile_fd == -1)
		error(1, errno, "unable to open outfile: %s", cfg_outfile);

	sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	if (sockfd == -1)
		error(1, errno, "unable to create listen socket");

	server_addr.sin_family = AF_INET;
	server_addr.sin_port = htons(cfg_port);
	server_addr.sin_addr = cfg_bind_addr;

	/* these values are range checked during parse_opts, so casting is safe
	 * here
	 */
	epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
	epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
	epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
	epoll_params.__pad = 0;

	val = 1;
	if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
		error(1, errno, "poller setsockopt reuseaddr");

	setnonblock(sockfd);

	if (bind(sockfd, (struct sockaddr *)&server_addr,
		 sizeof(struct sockaddr_in)))
		error(0, errno, "poller bind to port: %d\n", cfg_port);

	if (listen(sockfd, 1))
		error(1, errno, "poller listen");

	epfd = epoll_create1(0);
	if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
		error(1, errno, "unable to set busy poll params");

	epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);

	for (;;) {
		nfds = epoll_wait(epfd, events, cfg_max_events, -1);
		for (i = 0; i < nfds; i++) {
			if (events[i].data.fd == sockfd) {
				conn = accept(sockfd, NULL, NULL);
				if (conn == -1)
					error(1, errno,
					      "accepting incoming connection failed");

				setnonblock(conn);
				epoll_ctl_add(epfd, conn,
					      EPOLLIN | EPOLLET | EPOLLRDHUP |
					      EPOLLHUP);
			} else if (events[i].events & EPOLLIN) {
				for (;;) {
					readlen = read(events[i].data.fd, buf,
						       sizeof(buf));
					if (readlen > 0)
						write_chunk(outfile_fd, buf,
							    readlen);
					else
						break;
				}
			} else {
				/* spurious event ? */
			}
			if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
				epoll_ctl(epfd, EPOLL_CTL_DEL,
					  events[i].data.fd, NULL);
				close(events[i].data.fd);
				close(outfile_fd);
				return;
			}
		}
	}
}

int main(int argc, char *argv[])
{
	parse_opts(argc, argv);
	setup_queue();
	run_poller();
	return 0;
}