| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
 | /* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_STATIC_CALL_H
#define _LINUX_STATIC_CALL_H
/*
 * Static call support
 *
 * Static calls use code patching to hard-code function pointers into direct
 * branch instructions. They give the flexibility of function pointers, but
 * with improved performance. This is especially important for cases where
 * retpolines would otherwise be used, as retpolines can significantly impact
 * performance.
 *
 *
 * API overview:
 *
 *   DECLARE_STATIC_CALL(name, func);
 *   DEFINE_STATIC_CALL(name, func);
 *   DEFINE_STATIC_CALL_NULL(name, typename);
 *   DEFINE_STATIC_CALL_RET0(name, typename);
 *
 *   __static_call_return0;
 *
 *   static_call(name)(args...);
 *   static_call_cond(name)(args...);
 *   static_call_update(name, func);
 *   static_call_query(name);
 *
 *   EXPORT_STATIC_CALL{,_TRAMP}{,_GPL}()
 *
 * Usage example:
 *
 *   # Start with the following functions (with identical prototypes):
 *   int func_a(int arg1, int arg2);
 *   int func_b(int arg1, int arg2);
 *
 *   # Define a 'my_name' reference, associated with func_a() by default
 *   DEFINE_STATIC_CALL(my_name, func_a);
 *
 *   # Call func_a()
 *   static_call(my_name)(arg1, arg2);
 *
 *   # Update 'my_name' to point to func_b()
 *   static_call_update(my_name, &func_b);
 *
 *   # Call func_b()
 *   static_call(my_name)(arg1, arg2);
 *
 *
 * Implementation details:
 *
 *   This requires some arch-specific code (CONFIG_HAVE_STATIC_CALL).
 *   Otherwise basic indirect calls are used (with function pointers).
 *
 *   Each static_call() site calls into a trampoline associated with the name.
 *   The trampoline has a direct branch to the default function.  Updates to a
 *   name will modify the trampoline's branch destination.
 *
 *   If the arch has CONFIG_HAVE_STATIC_CALL_INLINE, then the call sites
 *   themselves will be patched at runtime to call the functions directly,
 *   rather than calling through the trampoline.  This requires objtool or a
 *   compiler plugin to detect all the static_call() sites and annotate them
 *   in the .static_call_sites section.
 *
 *
 * Notes on NULL function pointers:
 *
 *   Static_call()s support NULL functions, with many of the caveats that
 *   regular function pointers have.
 *
 *   Clearly calling a NULL function pointer is 'BAD', so too for
 *   static_call()s (although when HAVE_STATIC_CALL it might not be immediately
 *   fatal). A NULL static_call can be the result of:
 *
 *     DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
 *
 *   which is equivalent to declaring a NULL function pointer with just a
 *   typename:
 *
 *     void (*my_func_ptr)(int arg1) = NULL;
 *
 *   or using static_call_update() with a NULL function. In both cases the
 *   HAVE_STATIC_CALL implementation will patch the trampoline with a RET
 *   instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
 *   architectures can patch the trampoline call to a NOP.
 *
 *   In all cases, any argument evaluation is unconditional. Unlike a regular
 *   conditional function pointer call:
 *
 *     if (my_func_ptr)
 *         my_func_ptr(arg1)
 *
 *   where the argument evaludation also depends on the pointer value.
 *
 *   When calling a static_call that can be NULL, use:
 *
 *     static_call_cond(name)(arg1);
 *
 *   which will include the required value tests to avoid NULL-pointer
 *   dereferences.
 *
 *   To query which function is currently set to be called, use:
 *
 *   func = static_call_query(name);
 *
 *
 * DEFINE_STATIC_CALL_RET0 / __static_call_return0:
 *
 *   Just like how DEFINE_STATIC_CALL_NULL() / static_call_cond() optimize the
 *   conditional void function call, DEFINE_STATIC_CALL_RET0 /
 *   __static_call_return0 optimize the do nothing return 0 function.
 *
 *   This feature is strictly UB per the C standard (since it casts a function
 *   pointer to a different signature) and relies on the architecture ABI to
 *   make things work. In particular it relies on Caller Stack-cleanup and the
 *   whole return register being clobbered for short return values. All normal
 *   CDECL style ABIs conform.
 *
 *   In particular the x86_64 implementation replaces the 5 byte CALL
 *   instruction at the callsite with a 5 byte clear of the RAX register,
 *   completely eliding any function call overhead.
 *
 *   Notably argument setup is unconditional.
 *
 *
 * EXPORT_STATIC_CALL() vs EXPORT_STATIC_CALL_TRAMP():
 *
 *   The difference is that the _TRAMP variant tries to only export the
 *   trampoline with the result that a module can use static_call{,_cond}() but
 *   not static_call_update().
 *
 */
#include <linux/types.h>
#include <linux/cpu.h>
#include <linux/static_call_types.h>
#ifdef CONFIG_HAVE_STATIC_CALL
#include <asm/static_call.h>
/*
 * Either @site or @tramp can be NULL.
 */
extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail);
#define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)
#else
#define STATIC_CALL_TRAMP_ADDR(name) NULL
#endif
#define static_call_update(name, func)					\
({									\
	typeof(&STATIC_CALL_TRAMP(name)) __F = (func);			\
	__static_call_update(&STATIC_CALL_KEY(name),			\
			     STATIC_CALL_TRAMP_ADDR(name), __F);	\
})
#define static_call_query(name) (READ_ONCE(STATIC_CALL_KEY(name).func))
#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
extern int __init static_call_init(void);
extern void static_call_force_reinit(void);
struct static_call_mod {
	struct static_call_mod *next;
	struct module *mod; /* for vmlinux, mod == NULL */
	struct static_call_site *sites;
};
/* For finding the key associated with a trampoline */
struct static_call_tramp_key {
	s32 tramp;
	s32 key;
};
extern void __static_call_update(struct static_call_key *key, void *tramp, void *func);
extern int static_call_mod_init(struct module *mod);
extern int static_call_text_reserved(void *start, void *end);
extern long __static_call_return0(void);
#define DEFINE_STATIC_CALL(name, _func)					\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = _func,						\
		.type = 1,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
#define DEFINE_STATIC_CALL_NULL(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = NULL,						\
		.type = 1,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
#define DEFINE_STATIC_CALL_RET0(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = __static_call_return0,				\
		.type = 1,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
#define static_call_cond(name)	(void)__static_call(name)
#define EXPORT_STATIC_CALL(name)					\
	EXPORT_SYMBOL(STATIC_CALL_KEY(name));				\
	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
#define EXPORT_STATIC_CALL_GPL(name)					\
	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));			\
	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
/* Leave the key unexported, so modules can't change static call targets: */
#define EXPORT_STATIC_CALL_TRAMP(name)					\
	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name));				\
	ARCH_ADD_TRAMP_KEY(name)
#define EXPORT_STATIC_CALL_TRAMP_GPL(name)				\
	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name));			\
	ARCH_ADD_TRAMP_KEY(name)
#elif defined(CONFIG_HAVE_STATIC_CALL)
static inline int static_call_init(void) { return 0; }
#define DEFINE_STATIC_CALL(name, _func)					\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = _func,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)
#define DEFINE_STATIC_CALL_NULL(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = NULL,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
#define DEFINE_STATIC_CALL_RET0(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = __static_call_return0,				\
	};								\
	ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)
#define static_call_cond(name)	(void)__static_call(name)
static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
	cpus_read_lock();
	WRITE_ONCE(key->func, func);
	arch_static_call_transform(NULL, tramp, func, false);
	cpus_read_unlock();
}
static inline int static_call_text_reserved(void *start, void *end)
{
	return 0;
}
extern long __static_call_return0(void);
#define EXPORT_STATIC_CALL(name)					\
	EXPORT_SYMBOL(STATIC_CALL_KEY(name));				\
	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
#define EXPORT_STATIC_CALL_GPL(name)					\
	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name));			\
	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
/* Leave the key unexported, so modules can't change static call targets: */
#define EXPORT_STATIC_CALL_TRAMP(name)					\
	EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
#define EXPORT_STATIC_CALL_TRAMP_GPL(name)				\
	EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name))
#else /* Generic implementation */
static inline int static_call_init(void) { return 0; }
static inline long __static_call_return0(void)
{
	return 0;
}
#define __DEFINE_STATIC_CALL(name, _func, _func_init)			\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = _func_init,					\
	}
#define DEFINE_STATIC_CALL(name, _func)					\
	__DEFINE_STATIC_CALL(name, _func, _func)
#define DEFINE_STATIC_CALL_NULL(name, _func)				\
	__DEFINE_STATIC_CALL(name, _func, NULL)
#define DEFINE_STATIC_CALL_RET0(name, _func)				\
	__DEFINE_STATIC_CALL(name, _func, __static_call_return0)
static inline void __static_call_nop(void) { }
/*
 * This horrific hack takes care of two things:
 *
 *  - it ensures the compiler will only load the function pointer ONCE,
 *    which avoids a reload race.
 *
 *  - it ensures the argument evaluation is unconditional, similar
 *    to the HAVE_STATIC_CALL variant.
 *
 * Sadly current GCC/Clang (10 for both) do not optimize this properly
 * and will emit an indirect call for the NULL case :-(
 */
#define __static_call_cond(name)					\
({									\
	void *func = READ_ONCE(STATIC_CALL_KEY(name).func);		\
	if (!func)							\
		func = &__static_call_nop;				\
	(typeof(STATIC_CALL_TRAMP(name))*)func;				\
})
#define static_call_cond(name)	(void)__static_call_cond(name)
static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{
	WRITE_ONCE(key->func, func);
}
static inline int static_call_text_reserved(void *start, void *end)
{
	return 0;
}
#define EXPORT_STATIC_CALL(name)	EXPORT_SYMBOL(STATIC_CALL_KEY(name))
#define EXPORT_STATIC_CALL_GPL(name)	EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name))
#endif /* CONFIG_HAVE_STATIC_CALL */
#endif /* _LINUX_STATIC_CALL_H */
 |